@Preamble{
"\input bibnames.sty " #
"\input path.sty " #
"\def \TM {${}^{\sc TM}$} " #
"\ifx \undefined \circled \def \circled #1{(#1)} \fi" #
"\ifx \undefined \reg \def \reg {\circled{R}} \fi" #
"\hyphenation{ }"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-SIGPLAN = "ACM SIG{\-}PLAN Notices"}
@String{pub-ACM = "ACM Press"}
@String{pub-ACM:adr = "New York, NY, USA"}
@String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"}
@String{pub-AW:adr = "Reading, MA, USA"}
@String{ser-SIGPLAN = "ACM SIG{\-}PLAN Notices"}
@Article{Gershenfeld:2010:RAL,
author = "Neil Gershenfeld and David Dalrymple and Kailiang Chen
and Ara Knaian and Forrest Green and Erik D. Demaine
and Scott Greenwald and Peter Schmidt-Nielsen",
title = "Reconfigurable asynchronous logic automata: {(RALA)}",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "1--6",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Atig:2010:VPW,
author = "Mohamed Faouzi Atig and Ahmed Bouajjani and Sebastian
Burckhardt and Madanlal Musuvathi",
title = "On the verification problem for weak memory models",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "7--18",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Koskinen:2010:CGT,
author = "Eric Koskinen and Matthew Parkinson and Maurice
Herlihy",
title = "Coarse-grained transactions",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "19--30",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Attiya:2010:SVS,
author = "H. Attiya and G. Ramalingam and N. Rinetzky",
title = "Sequential verification of serializability",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "31--42",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Godefroid:2010:CMM,
author = "Patrice Godefroid and Aditya V. Nori and Sriram K.
Rajamani and Sai Deep Tetali",
title = "Compositional may-must program analysis: unleashing
the power of alternation",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "43--56",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chaudhuri:2010:CAP,
author = "Swarat Chaudhuri and Sumit Gulwani and Roberto
Lublinerman",
title = "Continuity analysis of programs",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "57--70",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Harris:2010:PAS,
author = "William R. Harris and Sriram Sankaranarayanan and
Franjo Ivan{\v{c}}i{\'c} and Aarti Gupta",
title = "Program analysis via satisfiability modulo path
programs",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "71--82",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tristan:2010:SVV,
author = "Jean-Baptiste Tristan and Xavier Leroy",
title = "A simple, verified validator for software pipelining",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "83--92",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chlipala:2010:VCI,
author = "Adam Chlipala",
title = "A verified compiler for an impure functional
language",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "93--106",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Myreen:2010:VJT,
author = "Magnus O. Myreen",
title = "Verified just-in-time compiler on {x86}",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "107--118",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Terauchi:2010:DTC,
author = "Tachio Terauchi",
title = "Dependent types from counterexamples",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "119--130",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Rondon:2010:LLL,
author = "Patrick Maxim Rondon and Ming Kawaguchi and Ranjit
Jhala",
title = "Low-level liquid types",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "131--144",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Schafer:2010:TID,
author = "Max Sch{\"a}fer and Oege de Moor",
title = "Type inference for datalog with complex type
hierarchies",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "145--156",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Henzinger:2010:BQN,
author = "Thomas A. Henzinger",
title = "From {Boolean} to quantitative notions of
correctness",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "157--158",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pitts:2010:NS,
author = "Andrew M. Pitts",
title = "Nominal system {T}",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "159--170",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hobor:2010:TIA,
author = "Aquinas Hobor and Robert Dockins and Andrew W.
Appel",
title = "A theory of indirection via approximation",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "171--184",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dreyer:2010:RML,
author = "Derek Dreyer and Georg Neis and Andreas Rossberg and
Lars Birkedal",
title = "A relational modal logic for higher-order stateful
{ADTs}",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "185--198",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Suter:2010:DPA,
author = "Philippe Suter and Mirco Dotta and Viktor Kuncak",
title = "Decision procedures for algebraic data types with
abstractions",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "199--210",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Magill:2010:ANA,
author = "Stephen Magill and Ming-Hsien Tsai and Peter Lee and
Yih-Kuen Tsay",
title = "Automatic numeric abstractions for heap-manipulating
programs",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "211--222",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jost:2010:SDQ,
author = "Steffen Jost and Kevin Hammond and Hans-Wolfgang Loidl
and Martin Hofmann",
title = "Static determination of quantitative resource usage
for higher-order programs",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "223--236",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Malecha:2010:TVR,
author = "Gregory Malecha and Greg Morrisett and Avraham Shinnar
and Ryan Wisnesky",
title = "Toward a verified relational database management
system",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "237--248",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Podelski:2010:CGF,
author = "Andreas Podelski and Thomas Wies",
title = "Counterexample-guided focus",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "249--260",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nanevski:2010:SVH,
author = "Aleksandar Nanevski and Viktor Vafeiadis and Josh
Berdine",
title = "Structuring the verification of heap-manipulating
programs",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "261--274",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jia:2010:DTP,
author = "Limin Jia and Jianzhou Zhao and Vilhelm Sj{\"o}berg
and Stephanie Weirich",
title = "Dependent types and program equivalence",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "275--286",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hutchins:2010:PSS,
author = "DeLesley S. Hutchins",
title = "Pure subtype systems",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "287--298",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gay:2010:MST,
author = "Simon J. Gay and Vasco T. Vasconcelos and Ant{\'o}nio
Ravara and Nils Gesbert and Alexandre Z. Caldeira",
title = "Modular session types for distributed object-oriented
programming",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "299--312",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Srivastava:2010:PVP,
author = "Saurabh Srivastava and Sumit Gulwani and Jeffrey S.
Foster",
title = "From program verification to program synthesis",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "313--326",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Vechev:2010:AGS,
author = "Martin Vechev and Eran Yahav and Greta Yorsh",
title = "Abstraction-guided synthesis of synchronization",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "327--338",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bodik:2010:PAN,
author = "Rastislav Bodik and Satish Chandra and Joel Galenson
and Doug Kimelman and Nicholas Tung and Shaon Barman
and Casey Rodarmor",
title = "Programming with angelic nondeterminism",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "339--352",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Greenberg:2010:CMM,
author = "Michael Greenberg and Benjamin C. Pierce and Stephanie
Weirich",
title = "Contracts made manifest",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "353--364",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Siek:2010:TB,
author = "Jeremy G. Siek and Philip Wadler",
title = "Threesomes, with and without blame",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "365--376",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wrigstad:2010:ITU,
author = "Tobias Wrigstad and Francesco Zappa Nardelli and
Sylvain Lebresne and Johan {\"O}stlund and Jan Vitek",
title = "Integrating typed and untyped code in a scripting
language",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "377--388",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tate:2010:GCO,
author = "Ross Tate and Michael Stepp and Sorin Lerner",
title = "Generating compiler optimizations from proofs",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "389--402",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dias:2010:AGI,
author = "Jo{\~a}o Dias and Norman Ramsey",
title = "Automatically generating instruction selectors using
declarative machine descriptions",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "403--416",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jim:2010:SAD,
author = "Trevor Jim and Yitzhak Mandelbaum and David Walker",
title = "Semantics and algorithms for data-dependent grammars",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "417--430",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Broberg:2010:PRB,
author = "Niklas Broberg and David Sands",
title = "{Paralocks}: role-based information flow control and
beyond",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "431--444",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bhargavan:2010:MVS,
author = "Karthikeyan Bhargavan and C{\'e}dric Fournet and
Andrew D. Gordon",
title = "Modular verification of security protocol code by
typing",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "445--456",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Martin:2010:DCO,
author = "Jean-Phillipe Martin and Michael Hicks and Manuel
Costa and Periklis Akritidis and Miguel Castro",
title = "Dynamically checking ownership policies in concurrent
{C}\slash {C++} programs",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "457--470",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Heizmann:2010:NI,
author = "Matthias Heizmann and Jochen Hoenicke and Andreas
Podelski",
title = "Nested interpolants",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "471--482",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Filinski:2010:MA,
author = "Andrzej Filinski",
title = "Monads in action",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "483--494",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kobayashi:2010:HOM,
author = "Naoki Kobayashi and Naoshi Tabuchi and Hiroshi Unno",
title = "Higher-order multi-parameter tree transducers and
recursion schemes for program verification",
journal = j-SIGPLAN,
volume = "45",
number = "1",
pages = "495--508",
month = jan,
year = "2010",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Mar 15 19:13:16 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nikhil:2010:UGP,
author = "Rishiyur S. Nikhil",
title = "Using {GPCE} principles for hardware systems and
accelerators: (bridging the gap to {HW} design)",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "1--2",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621608",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Moore's Law has precipitated a crisis in the creation
of hardware systems (ASICs and FPGAs)-how to design
such enormously complex concurrent systems quickly,
reliably and affordably? At the same time, portable
devices, the energy crisis, and high performance
computing present a related challenge-how to move
complex and high-performance algorithms from software
into hardware (for more speed and/or energy
efficiency)?\par
In this talk I will start with a brief technical
introduction to BSV, a language that directly addresses
these concerns. It uses ideas from Guarded Atomic
Actions (cf. Term Rewriting Systems, TLA+, Unity, and
EventB) to address complex concurrency with
scalability. It borrows from Haskell (types, type
classes, higher-order functions) for robustness and
powerful program generation (a.k.a. 'static
elaboration' to HW designers). And it is fully
synthesizable (compilable) into high-quality RTL
(Verilog/VHDL). I will then describe some of the
remarkable projects that BSV has enabled in industry
and academia today.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "Bluespec Systemverilog; BSV; energy efficient
computing; FPGA; hardware accelerators;
hardware/software codesign; Haskell; high level
synthesis; high performance computing; hybrid
computing; term rewriting systems",
}
@Article{Cordy:2010:EOO,
author = "James R. Cordy",
title = "Eating our own dog food: {DSLs} for generative and
transformational engineering",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "3--4",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837852.1621609",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Languages and systems to support generative and
transformational solutions have been around a long
time. Systems such as XVCL, DMS, ASF+SDF, Stratego and
TXL have proven mature, efficient and effective in a
wide range of applications. Even so, adoption remains a
serious issue - almost all successful production
applications of these systems in practice either
involve help from the original authors or years of
experience to get rolling. While work on accessibility
is active, with efforts such as ETXL, Stratego XT,
Rascal and Colm, the fundamental big step remains -
it's not obvious how to apply a general purpose
transformational system to any given generation or
transformation problem, and the real power is in the
paradigms of use, not the languages themselves.\par
In this talk I will propose an agenda for addressing
this problem by taking our own advice - designing and
implementing domain specific languages (DSLs) for
specific generative, transformational and analysis
problem domains. We widely advise end users of the need
for DSLs for their kinds of problems - why not for our
kinds? And we use our tools for implementing their DSLs
- why not our own? I will outline a general method for
using transformational techniques to implement
transformational and generative DSLs, and review
applications of the method to implementing example
text-based DSLs for model-based code generation and
static code analysis. Finally, I will outline some
first steps in implementing model transformation DSLs
using the same idea - retaining the maturity and
efficiency of our existing tools while bringing them to
the masses by 'eating our own dogfood'.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "domain-specific languages; generative programming;
model driven engineering; source transformation
systems",
}
@Article{Willcock:2010:RGP,
author = "Jeremiah James Willcock and Andrew Lumsdaine and
Daniel J. Quinlan",
title = "Reusable, generic program analyses and
transformations",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "5--14",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621611",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The optimizations in modern compilers are constructed
for a predetermined set of primitive types. As a
result, programmers are unable to exploit optimizations
for user-defined types where these optimizations would
be correct and beneficial. Moreover, because the set of
optimizations is also fixed, programmers are unable to
incorporate new optimizations into the compiler. To
address these limitations, we apply the reuse
methodologies from generic programming to compiler
analyses and optimizations. To enable compilers to
apply optimizations to classes of types rather than
particular types, we define optimizations in terms of
generic interface descriptions (similar to C++ concepts
or Haskell type classes). By extending these interface
descriptions to include associated program analysis and
transformation fragments, we enable compilers to
incorporate user-defined transformations and analyses.
Since these transformations are explicitly associated
with interface descriptions, they can be applied in
generic fashion by the compiler. We demonstrate that
classical compiler optimizations, when generalized
using this framework, can apply to a broad range of
types, both built-in and user-defined. Finally, we
present an initial implementation, the principles of
which are generalizable to other compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compiler optimization; generic programming",
}
@Article{Bagge:2010:ASB,
author = "Anya Helene Bagge and Valentin David and Magne
Haveraaen",
title = "The axioms strike back: testing with concepts and
axioms in {C++}",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "15--24",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621612",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern development practises encourage extensive
testing of code while it is still under development,
using unit tests to check individual code units in
isolation. Such tests are typically case-based,
checking a likely error scenario or an error that has
previously been identified and fixed. Coming up with
good test cases is challenging, and focusing on
individual tests can distract from creating tests that
cover the full functionality.\par
Axioms, known from program specification, allow for an
alternative way of generating test cases, where the
intended functionality is described as rules or
equations that can be checked automatically. Axioms are
proposed as part of the {\em concept\/} feature of the
upcoming C++0x standard.\par
In this paper, we describe how tests may be generated
automatically from axioms in C++ concepts, and supplied
with appropriate test data to form effective automated
unit tests.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "algebraic specification; axiom-based testing; axioms;
C++; C++0x; concepts; generative programming; mouldable
programming; program transformation; test generation;
unit testing",
}
@Article{Garcia:2010:TFT,
author = "Ronald Garcia and Andrew Lumsdaine",
title = "Toward foundations for type-reflective
metaprogramming",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "25--34",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621613",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "C++ template metaprogramming has been used with great
success to build software applications and libraries.
In practice, however, template metaprogramming suffers
usability, reliability, and capability shortcomings,
and it is not well understood in theory. Template
metaprogramming has these problems because it relies on
emergent properties of disparate language features that
were tailored to other purposes. As a step toward solid
and sound language support for metaprogramming, this
paper establishes firm semantic foundations for select
capabilities of template metaprogramming.\par
We analyze C++ and the idioms of template
metaprogramming and isolate, in a language-neutral
fashion, fundamental capabilities of C++ that enable
metaprogramming. Guided by this analysis, we present a
design for a core calculus that directly expresses
fundamental metaprogramming capabilities, including
static computation, code generation, and type
reflection. We prove a typesafety property for
compile-time evaluation of metaprograms. To formally
connect the core calculus to programming practice, we
present a more convenient surface language for
metaprogramming. Its semantics are captured by
type-directed translation to the core calculus. We
prove that this translation preserves
well-typing.\par
This idealized presentation averts some of the
shortcomings of C++ template metaprogramming and
provides a framework for further study.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "c++; metaprogramming; reflection; semantics",
}
@Article{Sadat-Mohtasham:2010:TPD,
author = "Hossein Sadat-Mohtasham and H. James Hoover",
title = "Transactional pointcuts: designation reification and
advice of interrelated join points",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "35--44",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837852.1621615",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aspect-oriented mechanisms are characterized by their
join point models. A join point model has three
components: join points, which are elements of language
semantics; 'a means of identifying join points'; and 'a
means of affecting the behaviour at those join points.'
A pointcut-advice model is a dynamic join point model
in which join points are points in program execution.
Pointcuts select a set of join points, and advice
affects the behaviour of the selected join points. In
this model, join points are typically selected and
advised independently of each other. That is, the
relationships between join points are not taken into
account in join point selection and advice. In
practice, join points are often not independent.
Instead, they form part of a higher-level operation
that implements the intent of the developer ({\em
e.g.\/} managing a resource). There are natural
situations in which join points should be selected only
if they play a specific role in that operation.\par
We propose a new join point model that takes join point
interrelationships into account and allows the
designation of more complex computations as join
points. Based on the new model, we have designed an
aspect-oriented construct called a {\em transactional
pointcut (transcut)}. Transcuts select sets of
interrelated join points and reify them into
higher-level join points that can be advised. They
share much of the machinery and intuition of pointcuts,
and can be viewed as their natural extension. We have
implemented a transcuts prototype as an extension to
the AspectJ language and integrated it into the abc
compiler. We present an example where a transcut is
applied to implement recommended resource handling
practices in the presence of exceptions within method
boundaries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "aspect-oriented programming; join point model;
transactional pointcut",
}
@Article{Akai:2010:EAS,
author = "Shumpei Akai and Shigeru Chiba",
title = "Extending {AspectJ} for separating regions",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "45--54",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837852.1621616",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Synchronization is a good candidate for an aspect in
aspect-oriented programming (AOP) since programmers
have to choose the best granularity of synchronization
for the underlying hardware to obtain the best
execution performance. If synchronization is an aspect,
programmers can change the synchronization code
independently of the rest of the program when the
program runs on different hardware. However, existing
AOP languages such as AspectJ have problems. They
cannot select an arbitrary code region as a join point.
Moreover, they cannot enforce weaving of a
synchronization aspect. Since it is an alternative
feature in feature modeling, at least one of available
synchronization aspects must be woven. Otherwise, the
program would be thread-unsafe. Since an aspect in
AspectJ is inherently optional, programmers must be
responsible for weaving it. To solve these problems,
this paper proposes two new constructs for AspectJ,
{\em regioncut\/} and {\em assertions for advice}.
Regioncut selects arbitrary code region as a join point
and assertion for advice enforces weaving a mandatory
advice. We implemented these constructs by extending
the AspectBench compiler. We evaluated the design of
our constructs by applying them to two open-source
software products, Javassist and Hadoop.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "aspect-oriented programming; feature-oriented
programming; region; synchronization",
}
@Article{Liu:2010:LFI,
author = "Yanhong A. Liu and Michael Gorbovitski and Scott D.
Stoller",
title = "A language and framework for invariant-driven
transformations",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "55--64",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837852.1621617",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes a language and framework that
allow coordinated transformations driven by invariants
to be specified declaratively, as invariant rules, and
applied automatically. The framework supports
incremental maintenance of invariants for program
design and optimization, as well as general
transformations for instrumentation, refactoring, and
other purposes. This paper also describes our
implementations for transforming Python and C programs
and experiments with successful applications of the
systems in generating efficient implementations from
clear and modular specifications, in instrumenting
programs for runtime verification, profiling, and
debugging, and in code refactoring.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "incremental maintenance; invariants; program
optimization; program transformation; runtime invariant
checking",
}
@Article{Wehr:2010:JBP,
author = "Stefan Wehr and Peter Thiemann",
title = "{JavaGI} in the battlefield: practical experience with
generalized interfaces",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "65--74",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621619",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Generalized interfaces are an extension of the
interface concept found in object-oriented languages
such as Java or C\#. The extension is inspired by
Haskell's type classes. It supports retroactive and
type-conditional interface implementations, binary
methods, symmetric multimethods, interfaces over
families of types, and static interface
methods.\par
This article reports practical experience with
generalized interfaces as implemented in the JavaGI
language. Several real-world case studies demonstrate
how generalized interfaces provide solutions to
extension and integration problems with components in
binary form, how they make certain design patterns
redundant, and how they eliminate various run-time
errors. In each case study, the use of JavaGI results
in elegant and highly readable code.\par
Furthermore, the article discusses the implementation
of a compiler and a run-time system for JavaGI.
Benchmarks show that our implementation offers
acceptable performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "case studies; external methods; JavaGI; multimethods;
retroactive interface implementation",
}
@Article{McGachey:2010:CJC,
author = "Phil McGachey and Antony L. Hosking and J. Eliot B.
Moss",
title = "Classifying {Java} class transformations for pervasive
virtualized access",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "75--84",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621620",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The indirection of object accesses is a common theme
for target domains as diverse as transparent
distribution, persistence, and program instrumentation.
Virtualizing accesses to fields and methods (by
redirecting calls through accessor and indirection
methods) allows interposition of arbitrary code,
extending the functionality of an application beyond
that intended by the original developer.\par
We present class modifications performed by our RuggedJ
transparent distribution platform for standard Java
virtual machines. RuggedJ abstracts over the location
of objects by implementing a single object model for
local and remote objects. However the implementation of
this model is complicated by the presence of native and
system code; classes loaded by Java's bootstrap class
loader can be rewritten only in a limited manner, and
so cannot be modified to conform to RuggedJ's complex
object model. We observe that system code comprises the
majority of a given Java application: an average of
76\% in the applications we study. We consider the
constraints imposed upon pervasive class transformation
within Java, and present a framework for systematically
rewriting arbitrary applications. Our system
accommodates all system classes, allowing both user and
system classes alike to be referenced using a single
object model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "Java; object model; program transformation",
}
@Article{Villazon:2010:ARA,
author = "Alex Villaz{\'o}n and Walter Binder and Danilo
Ansaloni and Philippe Moret",
title = "Advanced runtime adaptation for {Java}",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "85--94",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621621",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic aspect-oriented programming (AOP) enables
runtime adaptation of aspects, which is important for
building sophisticated, aspect-based software
engineering tools, such as adaptive profilers or
debuggers that dynamically modify instrumentation code
in response to user interactions. Today, many AOP
frameworks for Java, notably AspectJ, focus on aspect
weaving at compile-time or at load-time, and offer only
limited support for aspect adaptation and reweaving at
runtime. In this paper, we introduce HotWave, an AOP
framework based on AspectJ for standard Java Virtual
Machines (JVMs). HotWave supports dynamic (re)weaving
of previously loaded classes, and it ensures that all
classes loaded in a JVM can be (re)woven, including the
classes of the standard Java class library. HotWave
features a novel mechanism for inter-advice
communication, enabling efficient data passing between
advices that are woven into the same method. We explain
HotWave's programming model and discuss our
implementation techniques. As case study, we present an
adaptive, aspect-based profiler that leverages
HotWave's distinguishing features.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "AspectJ; bytecode instrumentation; code hotswapping;
dynamic aspect-oriented programming; Java Virtual
Machine; runtime aspect adaptation and (re)weaving",
}
@Article{Villazon:2010:HCA,
author = "Alex Villaz{\'o}n and Walter Binder and Danilo
Ansaloni and Philippe Moret",
title = "{HotWave}: creating adaptive tools with dynamic
aspect-oriented programming in {Java}",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "95--98",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621622",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developing tools for profiling, debugging, testing,
and reverse engineering is error-prone, time-consuming,
and therefore costly when using low-level techniques,
such as bytecode instrumentation. As a solution to
these problems, we promote tool development in Java
using high-level aspect-oriented programming (AOP). We
demonstrate that the use of aspects yields compact
tools that are easy to develop and extend. As enabling
technology, we rely on HotWave, a new tool for dynamic
and comprehensive aspect weaving. HotWave reconciles
compatibility with existing virtual machine and AOP
technologies. It provides support for runtime
adaptation of aspects and reweaving of previously
loaded code, as well as the ability to weave aspects
into all methods executing in a Java Virtual Machine,
including methods in the standard Java class library.
HotWave also features a new mechanism for efficiently
passing data between advices that are woven into the
same method. We demonstrate the benefits of HotWave's
distinguishing features with two case studies in the
area of profiling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "AspectJ; bytecode instrumentation; code hotswapping;
dynamic aspect-oriented programming; Java Virtual
Machine; profiling; runtime weaving",
}
@Article{Heidenreich:2010:GST,
author = "Florian Heidenreich and Jendrik Johannes and Mirko
Seifert and Christian Wende and Marcel B{\"o}hme",
title = "Generating safe template languages",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "99--108",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621624",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Template languages are widely used within generative
programming, because they provide intuitive means to
generate software artefacts expressed in a specific
object language. However, most template languages
perform template instantiation on the level of string
literals, which allows neither syntax checks nor
semantics analysis. To make sure that generated
artefacts always conform to the object language, we
propose to perform static analysis at template design
time. In addition, the increasing popularity of
domain-specific languages (DSLs) demands an approach
that allows to reuse both the concepts of template
languages and the corresponding tools.\par
In this paper we address the issues mentioned above by
presenting how existing languages can be automatically
extended with generic template concepts (e.g.,
placeholders, loops, conditions) to obtain safe
template languages. These languages provide means for
syntax checking and static semantic analysis w.r.t. the
object language at template design time. We discuss the
prerequisites for this extension, analyse the types of
correctness properties that can be assured at template
design time, and exemplify the key benefits of this
approach on a textual DSL and Java.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "generative programming; language extension; safe
authoring; template language",
}
@Article{Kong:2010:APT,
author = "Soonho Kong and Wontae Choi and Kwangkeun Yi",
title = "Abstract parsing for two-staged languages with
concatenation",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "109--116",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621625",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This article, based on Doh, Kim, and Schmidt's
'abstract parsing' technique, presents an abstract
interpretation for statically checking the syntax of
generated code in two-staged programs. Abstract parsing
is a static analysis technique for checking the syntax
of generated strings. We adopt this technique for
two-staged programming languages and formulate it in
the abstract interpretation framework. We parameterize
our analysis with the abstract domain so that one can
choose the abstract domain as long as it satisfies the
condition we provide. We also present an instance of
the abstract domain, namely an abstract parse stack and
its widening with k-cutting.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "abstract interpretation; multi-staged languages;
parsing; program analysis",
}
@Article{Nedunuri:2010:SFP,
author = "Srinivas Nedunuri and William R. Cook",
title = "Synthesis of fast programs for maximum segment sum
problems",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "117--126",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837852.1621626",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is well-known that a naive algorithm can often be
turned into an efficient program by applying
appropriate semantics-preserving transformations. This
technique has been used to derive programs to solve a
variety of maximum-sum programs. One problem with this
approach is that each problem variation requires a new
set of transformations to be derived. An alternative
approach to generation combines problem specifications
with flexible algorithm theories to derive efficient
algorithms. We show how this approach can be
implemented in Haskell and applied to solve constraint
satisfaction problems. We illustrate this technique by
deriving programs for three varieties of
maximum-weightsum problem. The derivations of the
different programs are similar, and the resulting
programs are asymptotically faster in practice than the
programs created by transformation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "algorithms; branch-and-bound; formal methods; program
synthesis; segment-sum problems",
}
@Article{Radermacher:2010:GEI,
author = "Ansgar Radermacher and Arnaud Cuccuru and Sebastien
Gerard and Fran{\c{c}}ois Terrier",
title = "Generating execution infrastructures for
component-oriented specifications with a model driven
toolchain: a case study for {MARTE}'s {GCM} and
real-time annotations",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "127--136",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621628",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The development of embedded Systems becomes more and
more complex. Model driven engineering can help to
manage this complexity by specifying real-time
properties in a declarative way and automating the
deployment. The UML profile MARTE is a OMG standard
that allows to model real-time properties. However,
there is no execution infrastructure that supports
MARTE's generic component model (GCM) and the
application modeling (HLAM).\par
The contribution of the paper is twofold: it presents a
proposition of a component model with flexible
interaction support that allows to tailor code
generation to domain and target requirements. Second,
it will show how MARTE's GCM concepts can be
implemented by means of the proposed component model.
The proposed component model has been largely developed
in the context of the French national project
Flex-eWare with the intention to unify major components
model, notably the CORBA component model (CCM) and
Fractal. The paper explains the major elements of this
model in detail and shows how specific connector and
containers can implement MARTE specifications. We
present the tool support that is integrated into a UML
modeler and based on model-to-model and model to text
transformations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "component models; connectors; MARTE; middleware;
model-driven engineering",
}
@Article{Cassou:2010:GPA,
author = "Damien Cassou and Benjamin Bertran and Nicolas Loriant
and Charles Consel",
title = "A generative programming approach to developing
pervasive computing systems",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "137--146",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621629",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developing pervasive computing applications is a
difficult task because it requires to deal with a wide
range of issues: heterogeneous devices, entity
distribution, entity coordination, low-level hardware
knowledge. \ldots{} Besides requiring various areas of
expertise, programming such applications involves
writing a lot of administrative code to glue
technologies together and to interface with both
hardware and software components.\par
This paper proposes a generative programming approach
to providing programming, execution and simulation
support dedicated to the pervasive computing domain.
This approach relies on a domain-specific language,
named DiaSpec, dedicated to the description of
pervasive computing systems. Our generative approach
factors out features of distributed systems
technologies, making DiaSpec-specified software systems
portable.\par
The DiaSpec compiler is implemented and has been used
to generate dedicated programming frameworks for a
variety of pervasive computing applications, including
detailed ones to manage the building of an engineering
school.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "DSL; generative programming; pervasive computing",
}
@Article{Jarvi:2010:AUI,
author = "Jaakko J{\"a}rvi and Mat Marcus and Sean Parent and
John Freeman and Jacob Smith",
title = "Algorithms for user interfaces",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "147--156",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621630",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "User interfaces for modern applications must support a
rich set of interactive features. It is commonplace to
find applications with dependencies between values
manipulated by user interface elements, conditionally
enabled controls, and script record-ability and
playback against different documents. A significant
fraction of the application programming effort is
devoted to implementing such functionality, and the
resulting code is typically not reusable.\par
This paper extends our 'property models' approach to
programming user interfaces. Property models allow a
large part of the functionality of a user interface to
be implemented in reusable libraries, reducing
application specific code to a set of declarative
rules. We describe how, as a by-product of computations
that maintain the values of user interface elements,
property models obtain accurate information of the
currently active dependencies among those elements.
This information enables further expanding the class of
user interface functionality that we can encode as
generic algorithms. In particular, we describe
automating the decisions for the enablement of user
interface widgets and activation of command widgets.
Failing to disable or deactivate widgets correctly is a
common source of user-interface defects, which our
approach largely removes.\par
We report on the increased reuse, reduced defect rates,
and improved user interface design turnarounds in a
commercial software development effort as a result of
adopting our approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "component software; constraint systems; declarative
specifications; software reuse; user interfaces",
}
@Article{Kastner:2010:MRP,
author = "Christian K{\"a}stner and Sven Apel and Martin
Kuhlemann",
title = "A model of refactoring physically and virtually
separated features",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "157--166",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837852.1621632",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Physical separation with class refinements and method
refinements {\`a} la AHEAD and virtual separation using
annotations {\`a} la {\em \#ifdef\/} or CIDE are two
competing implementation approaches for software
product lines with complementary advantages. Although
both approaches have been mainly discussed in
isolation, we strive for an integration to leverage the
respective advantages. In this paper, we lay the
foundation for such an integration by providing a model
that supports both physical and virtual separation and
by describing refactorings in both directions. We prove
the refactorings complete, so every virtually separated
product line can be automatically transformed into a
physically separated one (replacing annotations by
refinements) and vice versa. To demonstrate the
feasibility of our approach, we have implemented the
refactorings in our tool CIDE and conducted four case
studies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "AHEAD; CIDE; FeatureHouse; preprocessor; refinements;
separation of concerns; software product lines",
}
@Article{Sanen:2010:MPS,
author = "Frans Sanen and Eddy Truyen and Wouter Joosen",
title = "Mapping problem-space to solution-space features: a
feature interaction approach",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "167--176",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837852.1621633",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mapping problem-space features into solution-space
features is a fundamental configuration problem in
software product line engineering. A configuration
problem is defined as generating the most optimal
combination of software features given a requirements
specification and given a set of configuration rules.
Current approaches however provide little support for
expressing complex configuration rules between problem
and solution space that support incomplete requirements
specifications. In this paper, we propose an approach
to model complex configuration rules based on a
generalization of the concept of problem-solution
feature interactions. These are interactions between
solution-space features that only arise in specific
problem contexts. The use of an existing tool to
support our approach is also discussed: we use the DLV
answer set solver to express a particular configuration
problem as a logic program whose answer set corresponds
to the optimal combinations of solution-space features.
We motivate and illustrate our approach with a case
study in the field of managing dynamic adaptations in
distributed software, where the goal is to generate an
optimal protocol for accommodating a given
adaptation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "configuration knowledge; default logic; distributed
runtime adaptation; DLV; problem-solution feature
interactions; software product line engineering",
}
@Article{Kuhlemann:2010:SCN,
author = "Martin Kuhlemann and Don Batory and Christian
K{\"a}stner",
title = "Safe composition of non-monotonic features",
journal = j-SIGPLAN,
volume = "45",
number = "2",
pages = "177--186",
month = feb,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1621607.1621634",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:37:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programs can be composed from features. We want to
verify automatically that all legal combinations of
features can be composed safely without errors. Prior
work on this problem assumed that features add code
monotonically. We generalize prior work to enable
features to add {\em and remove\/} code, describe our
analyses and implementation, and review case studies.
We observe that more expressive features increase the
complexity of developed programs rapidly -- up to the
point where tools and automated concepts as presented
in this paper are indispensable for verification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "AHEAD; feature-oriented programming; refactoring; safe
composition",
}
@Article{Brewer:2010:TDR,
author = "Eric A. Brewer",
title = "Technology for developing regions: {Moore}'s law is
not enough",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "1--2",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736021",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The historic focus of development has rightfully been
on macroeconomics and good governance, but technology
has an increasingly large role to play. In this talk, I
review several novel technologies that we have deployed
in India and Africa, and discuss the challenges and
opportunities of this new subfield of EECS research.
Working with the Aravind Eye Hospital, we are currently
supporting doctor / patient videoconferencing in 30+
rural villages; more than 25,000 people have had their
blindness cured due to these exams.\par
Although Moore's Law has led to great cost reductions
and thus enabled new technologies, we have reached
essentially the low point for cost: the computing is
essentially free compared to the rest of the system.
The premium is thus on a combination of (1) deeper
integration (fewer compo-nents), (2) shared usage
models (even phones are shared), and (3) lower
operating costs in terms of power and connectivity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "developing regions; ictd; it for development.",
}
@Article{Ipek:2010:DRM,
author = "Engin Ipek and Jeremy Condit and Edmund B. Nightingale
and Doug Burger and Thomas Moscibroda",
title = "Dynamically replicated memory: building reliable
systems from nanoscale resistive memories",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "3--14",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736023",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "DRAM is facing severe scalability challenges in
sub-45nm technology nodes due to precise charge
placement and sensing hurdles in deep-submicron
geometries. Resistive memories, such as phase-change
memory (PCM), already scale well beyond DRAM and are a
promising DRAM replacement. Unfortunately, PCM is
write-limited, and current approaches to managing
writes must decommission pages of PCM when the first
bit fails.\par
This paper presents {\em dynamically replicated
memory\/} (DRM), the first hardware and operating
system interface designed for PCM that allows {\em
continued operation through graceful degradation\/}
when hard faults occur. DRM reuses memory pages that
contain hard faults by dynamically forming pairs of
complementary pages that act as a single page of
storage. No changes are required to the processor
cores, the cache hierarchy, or the operating system's
page tables. By changing the memory controller, the
TLBs, and the operating system to be DRM-aware, we can
improve the lifetime of PCM by up to 40x over
conventional error-detection techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "phase-change memory; write endurance",
}
@Article{Kirman:2010:PEA,
author = "Nevin Kirman and Jos{\'e} F. Mart{\'\i}nez",
title = "A power-efficient all-optical on-chip interconnect
using wavelength-based oblivious routing",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "15--28",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736024",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an all-optical approach to constructing
data networks on chip that combines the following key
features: (1) Wavelength-based routing, where the route
followed by a packet depends solely on the wavelength
of its carrier signal, and not on information either
contained in the packet or traveling along with it. (2)
Oblivious routing, by which the wavelength (and thus
the route) employed to connect a source-destination
pair is invariant for that pair, and does not depend on
ongoing transmissions by other nodes, thereby
simplifying design and operation. And (3) passive
optical wavelength routers, whose routing pattern is
set at design time, which allows for area and power
optimizations not generally available to solutions that
use dynamic routing. Compared to prior proposals, our
evaluation shows that our solution is significantly
more power efficient at a similar level of
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "nanophotonics; on-chip network; optical network;
wavelength-based oblivious routing",
}
@Article{Neelakantam:2010:RSE,
author = "Naveen Neelakantam and David R. Ditzel and Craig
Zilles",
title = "A real system evaluation of hardware atomicity for
software speculation",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "29--38",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736026",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we evaluate the atomic region compiler
abstraction by incorporating it into a commercial
system. We find that atomic regions are simple and
intuitive to integrate into an x86 binary-translation
system. Furthermore, doing so trivially enables
additional optimization opportunities beyond that
achievable by a high-performance dynamic optimizer,
which already implements superblocks.\par
We show that atomic regions can suffer from severe
performance penalties if misspeculations are left
uncontrolled, but that a simple software control
mechanism is sufficient to reign in all detrimental
side-effects. We evaluate using full reference runs of
the SPEC CPU2000 integer benchmarks and find that
atomic regions enable up to a 9\% (3\% on average)
improvement beyond the performance of a tuned
product.\par
These performance improvements are achieved without any
negative side effects. Performance side effects such as
code bloat are absent with atomic regions; in fact,
static code size is reduced. The hardware necessary is
synergistic with other needs and was already available
on the commercial product used in our evaluation.
Finally, the software complexity is minimal as a single
developer was able to incorporate atomic regions into a
sophisticated 300,000 line code base in three months,
despite never having seen the translator source code
beforehand.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "atomicity; checkpoint; dynamic translation;
optimization; speculation",
}
@Article{Harris:2010:DFM,
author = "Tim Harris and Sa{\v{s}}a Tomic and Adri{\'a}n Cristal
and Osman Unsal",
title = "Dynamic filtering: multi-purpose architecture support
for language runtime systems",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "39--52",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736027",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces a new abstraction to accelerate
the read-barriers and write-barriers used by language
runtime systems. We exploit the fact that, dynamically,
many barrier executions perform checks but no real work
-- e.g., in generational garbage collection (GC),
frequent checks are needed to detect the creation of
inter-generational references, even though such
references occur rarely in many workloads. We introduce
a form of dynamic filtering that identifies redundant
checks by (i) recording checks that have recently been
executed, and (ii) detecting when a barrier is
repeating one of these checks. We show how this
technique can be applied to a variety of algorithms for
GC, transactional memory, and language-based security.
By supporting dynamic filtering in the instruction set,
we show that the fast-paths of these barriers can be
streamlined, reducing the impact on the quality of
surrounding code. We show how we accelerate the
barriers used for generational GC and transactional
memory in the Bartok research compiler. With a
2048-entry filter, dynamic filtering eliminates almost
all the overhead of the GC write-barriers. Dynamic
filtering eliminates around half the overhead of STM
over a non-synchronized baseline -- even when used with
an STM that is already designed for low overhead, and
which employs static analyses to avoid redundant
operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "garbage collection; runtime systems; transactional
memory",
}
@Article{Bergan:2010:CCR,
author = "Tom Bergan and Owen Anderson and Joseph Devietti and
Luis Ceze and Dan Grossman",
title = "{CoreDet}: a compiler and runtime system for
deterministic multithreaded execution",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "53--64",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736029",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The behavior of a multithreaded program does not
depend only on its inputs. Scheduling, memory
reordering, timing, and low-level hardware effects all
introduce nondeterminism in the execution of
multithreaded programs. This severely complicates many
tasks, including debugging, testing, and automatic
replication. In this work, we avoid these complications
by eliminating their root cause: we develop a compiler
and runtime system that runs arbitrary multithreaded
C/C++ POSIX Threads programs deterministically.\par
A trivial nonperformant approach to providing
determinism is simply deterministically serializing
execution. Instead, we present a compiler and runtime
infrastructure that ensures determinism but resorts to
serialization rarely, for handling interthread
communication and synchronization. We develop two basic
approaches, both of which are largely dynamic with
performance improved by some static compiler
optimizations. First, an ownership-based approach
detects interthread communication via an evolving table
that tracks ownership of memory regions by threads.
Second, a buffering approach uses versioned memory and
employs a deterministic commit protocol to make changes
visible to other threads. While buffering has larger
single-threaded overhead than ownership, it tends to
scale better (serializing less often). A hybrid system
sometimes performs and scales better than either
approach individually.\par
Our implementation is based on the LLVM compiler
infrastructure. It needs neither programmer annotations
nor special hardware. Our empirical evaluation uses the
PARSEC and SPLASH2 benchmarks and shows that our
approach scales comparably to nondeterministic
execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compilers; determinism; multicore; multithreading",
}
@Article{Raman:2010:SPU,
author = "Arun Raman and Hanjun Kim and Thomas R. Mason and
Thomas B. Jablin and David I. August",
title = "Speculative parallelization using software
multi-threaded transactions",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "65--76",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736030",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the right techniques, multicore architectures may
be able to continue the exponential performance trend
that elevated the performance of applications of all
types for decades. While many scientific programs can
be parallelized without speculative techniques,
speculative parallelism appears to be the key to
continuing this trend for general-purpose applications.
Recently-proposed code parallelization techniques, such
as those by Bridges et al. and by Thies et al.,
demonstrate scalable performance on multiple cores by
using speculation to divide code into atomic units
(transactions) that span multiple threads in order to
expose data parallelism. Unfortunately, most software
and hardware Thread-Level Speculation (TLS) memory
systems and transactional memories are not sufficient
because they only support single-threaded atomic units.
Multi-threaded Transactions (MTXs) address this
problem, but they require expensive hardware support as
currently proposed in the literature. This paper
proposes a Software MTX (SMTX) system that captures the
{\em applicability\/} and {\em performance\/} of
hardware MTX, but on {\em existing multicore machines}.
The SMTX system yields a harmonic mean speedup of
13.36x on native hardware with four 6-core processors
(24 cores in total) running speculatively parallelized
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "automatic parallelization; loop-level parallelism;
multi-threaded transactions; pipelined parallelism;
software transactional memory; thread-level
speculation",
}
@Article{Lee:2010:REO,
author = "Dongyoon Lee and Benjamin Wester and Kaushik
Veeraraghavan and Satish Narayanasamy and Peter M. Chen
and Jason Flinn",
title = "{Respec}: efficient online multiprocessor replay via
speculation and external determinism",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "77--90",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736031",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deterministic replay systems record and reproduce the
execution of a hardware or software system. While it is
well known how to replay uniprocessor systems,
replaying shared memory multiprocessor systems at low
overhead on commodity hardware is still an open
problem. This paper presents Respec, a new way to
support deterministic replay of shared memory
multithreaded programs on commodity multiprocessor
hardware. Respec targets online replay in which the
recorded and replayed processes execute
concurrently.\par
Respec uses two strategies to reduce overhead while
still ensuring correctness: speculative logging and
externally deterministic replay. Speculative logging
optimistically logs less information about shared
memory dependencies than is needed to guarantee
deterministic replay, then recovers and retries if the
replayed process diverges from the recorded process.
Externally deterministic replay relaxes the degree to
which the two executions must match by requiring only
their system output and final program states match. We
show that the combination of these two techniques
results in low recording and replay overhead for the
common case of data-race-free execution intervals and
still ensures correct replay for execution intervals
that have data races.\par
We modified the Linux kernel to implement our
techniques. Our software system adds on average about
18\% overhead to the execution time for recording and
replaying programs with two threads and 55\% overhead
for programs with four threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "external determinism; replay; speculative execution",
}
@Article{Eyerman:2010:PJS,
author = "Stijn Eyerman and Lieven Eeckhout",
title = "Probabilistic job symbiosis modeling for {SMT}
processor scheduling",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "91--102",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736033",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Symbiotic job scheduling boosts simultaneous
multithreading (SMT) processor performance by
co-scheduling jobs that have `compatible' demands on
the processor's shared resources. Existing approaches
however require a sampling phase, evaluate a limited
number of possible co-schedules, use heuristics to
gauge symbiosis, are rigid in their optimization
target, and do not preserve system-level
priorities/shares.\par
This paper proposes probabilistic job symbiosis
modeling, which predicts whether jobs will create
positive or negative symbiosis when co-scheduled
without requiring the co-schedule to be evaluated. The
model, which uses per-thread cycle stacks computed
through a previously proposed cycle accounting
architecture, is simple enough to be used in system
software. Probabilistic job symbiosis modeling provides
six key innovations over prior work in symbiotic job
scheduling: (i) it does not require a sampling phase,
(ii) it readjusts the job co-schedule continuously,
(iii) it evaluates a large number of possible
co-schedules at very low overhead, (iv) it is not
driven by heuristics, (v) it can optimize a performance
target of interest (e.g., system throughput or job
turnaround time), and (vi) it preserves system-level
priorities/shares. These innovations make symbiotic job
scheduling both practical and effective.\par
Our experimental evaluation, which assumes a realistic
scenario in which jobs come and go, reports an average
16\% (and up to 35\%) reduction in job turnaround time
compared to the previously proposed SOS (sample,
optimize, symbios) approach for a two-thread SMT
processor, and an average 19\% (and up to 45\%)
reduction in job turnaround time for a four-thread SMT
processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "performance modeling; simultaneous multi-threading
(SMT); symbiotic job scheduling",
}
@Article{Shen:2010:RBV,
author = "Kai Shen",
title = "Request behavior variations",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "103--116",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736034",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A large number of user requests execute (often
concurrently) within a server system. A single request
may exhibit fluctuating hardware characteristics (such
as instruction completion rate and on-chip resource
usage) over the course of its execution, due to
inherent variations in application execution semantics
as well as dynamic resource competition on
resource-sharing processors like multicores.
Understanding such behavior variations can assist
fine-grained request modeling and adaptive resource
management.\par
This paper presents operating system management to
track request behavior variations online. In addition
to metric sample collection during periodic interrupts,
we exploit the frequent system calls in server
applications to perform low-cost in-kernel sampling. We
utilize identified behavior variations to support or
enhance request modeling in request classification,
anomaly analysis, and online request signature
construction. A foundation of our request modeling is
the ability to quantify the difference between two
requests' time series behaviors. We evaluate several
differencing measures and enhance the classic dynamic
time warping technique with additional penalties for
asynchronous warp steps. Finally, motivated by
fluctuating request resource usage and the resulting
contention, we implement contention-easing CPU
scheduling on multicore platforms and demonstrate its
effectiveness in improving the worst-case request
performance.\par
Experiments in this paper are based on five server
applications -- Apache web server, TPCC, TPCH, RUBiS
online auction benchmark, and a user-content-driven
online teaching application called WeBWorK.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "hardware counter; multicore; operating system
adaptation; request modeling; server system",
}
@Article{Johnson:2010:DCM,
author = "F. Ryan Johnson and Radu Stoica and Anastasia Ailamaki
and Todd C. Mowry",
title = "Decoupling contention management from scheduling",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "117--128",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736035",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many parallel applications exhibit unpredictable
communication between threads, leading to contention
for shared objects. The choice of contention management
strategy impacts strongly the performance and
scalability of these applications: spinning provides
maximum performance but wastes significant processor
resources, while blocking-based approaches conserve
processor resources but introduce high overheads on the
critical path of computation. Under situations of high
or changing load, the operating system complicates
matters further with arbitrary scheduling decisions
which often preempt lock holders, leading to long
serialization delays until the preempted thread resumes
execution.\par
We observe that contention management is orthogonal to
the problems of scheduling and load management and
propose to decouple them so each may be solved
independently and effectively. To this end, we propose
a load control mechanism which manages the number of
active threads in the system separately from any
contention which may exist. By isolating contention
management from damaging interactions with the OS
scheduler, we combine the efficiency of spinning with
the robustness of blocking. The proposed load control
mechanism results in stable, high performance for both
lightly and heavily loaded systems, requires no special
privileges or modifications at the OS level, and can be
implemented as a library which benefits existing
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "blocking; concurrency control; contention; load
management; multicore; scheduling; spinning; threads",
}
@Article{Zhuravlev:2010:ASR,
author = "Sergey Zhuravlev and Sergey Blagodurov and Alexandra
Fedorova",
title = "Addressing shared resource contention in multicore
processors via scheduling",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "129--142",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736036",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Contention for shared resources on multicore
processors remains an unsolved problem in existing
systems despite significant research efforts dedicated
to this problem in the past. Previous solutions focused
primarily on hardware techniques and software page
coloring to mitigate this problem. Our goal is to
investigate how and to what extent contention for
shared resource can be mitigated via thread scheduling.
Scheduling is an attractive tool, because it does not
require extra hardware and is relatively easy to
integrate into the system. Our study is the first to
provide a comprehensive analysis of
contention-mitigating techniques that use only
scheduling. The most difficult part of the problem is
to find a classification scheme for threads, which
would determine how they affect each other when
competing for shared resources. We provide a
comprehensive analysis of such classification schemes
using a newly proposed methodology that enables to
evaluate these schemes separately from the scheduling
algorithm itself and to compare them to the optimal. As
a result of this analysis we discovered a
classification scheme that addresses not only
contention for cache space, but contention for other
shared resources, such as the memory controller, memory
bus and prefetching hardware. To show the applicability
of our analysis we design a new scheduling algorithm,
which we prototype at user level, and demonstrate that
it performs within 2\\% of the optimal. We also
conclude that the highest impact of contention-aware
scheduling techniques is not in improving performance
of a workload as a whole but in improving quality of
service or performance isolation for individual
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "multicore processors; scheduling; shared resource
contention",
}
@Article{Yuan:2010:SED,
author = "Ding Yuan and Haohui Mai and Weiwei Xiong and Lin Tan
and Yuanyuan Zhou and Shankar Pasupathy",
title = "{SherLog}: error diagnosis by connecting clues from
run-time logs",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "143--154",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736038",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computer systems often fail due to many factors such
as software bugs or administrator errors. Diagnosing
such production run failures is an important but
challenging task since it is difficult to reproduce
them in house due to various reasons: (1)
unavailability of users' inputs and file content due to
privacy concerns; (2) difficulty in building the exact
same execution environment; and (3) non-determinism of
concurrent executions on
multi-processors.\par
Therefore, programmers often have to diagnose a
production run failure based on logs collected back
from customers and the corresponding source code. Such
diagnosis requires expert knowledge and is also too
time-consuming, tedious to narrow down root causes. To
address this problem, we propose a tool, called
SherLog, that analyzes source code by leveraging
information provided by run-time logs to infer what
must or may have happened during the failed production
run. It requires neither re-execution of the program
nor knowledge on the log's semantics. It infers both
control and data value information regarding to the
failed execution.\par
We evaluate SherLog with 8 representative {\em real
world\/} software failures (6 software bugs and 2
configuration errors) from 7 applications including 3
servers. Information inferred by SherLog are very
useful for programmers to diagnose these evaluated
failures. Our results also show that SherLog can
analyze large server applications such as Apache with
thousands of logging messages within only 40 minutes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "failure diagnostics; log; static analysis",
}
@Article{Weeratunge:2010:AMD,
author = "Dasarath Weeratunge and Xiangyu Zhang and Suresh
Jagannathan",
title = "Analyzing multicore dumps to facilitate concurrency
bug reproduction",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "155--166",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736039",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Debugging concurrent programs is difficult. This is
primarily because the inherent non-determinism that
arises because of scheduler interleavings makes it hard
to easily reproduce bugs that may manifest only under
certain interleavings. The problem is exacerbated in
multi-core environments where there are multiple
schedulers, one for each core. In this paper, we
propose a reproduction technique for concurrent
programs that execute on multi-core platforms. Our
technique performs a lightweight analysis of a failing
execution that occurs in a multi-core environment, and
uses the result of the analysis to enable reproduction
of the bug in a single-core system, under the control
of a deterministic scheduler.\par
More specifically, our approach automatically
identifies the execution point in the re-execution that
corresponds to the failure point. It does so by
analyzing the failure core dump and leveraging a
technique called {\em execution indexing\/} that
identifies a related point in the re-execution. By
generating a core dump at this point, and comparing the
differences between the two dumps, we are able to guide
a search algorithm to efficiently generate a failure
inducing schedule. Our experiments show that our
technique is highly effective and has reasonable
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "concurrency bugs; execution indexing; multi-core;
reproduction",
}
@Article{Burckhardt:2010:RSP,
author = "Sebastian Burckhardt and Pravesh Kothari and Madanlal
Musuvathi and Santosh Nagarakatte",
title = "A randomized scheduler with probabilistic guarantees
of finding bugs",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "167--178",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736040",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a randomized scheduler for finding
concurrency bugs. Like current stress-testing methods,
it repeatedly runs a given test program with supplied
inputs. However, it improves on stress-testing by
finding buggy schedules more effectively and by
quantifying the probability of missing concurrency
bugs. Key to its design is the characterization of the
depth of a concurrency bug as the minimum number of
scheduling constraints required to find it. In a single
run of a program with {\em n\/} threads and {\em k\/}
steps, our scheduler detects a concurrency bug of depth
{\em d\/} with probability at least 1/ {\em
nk\/}$^{d-1}$. We hypothesize that in practice, many
concurrency bugs (including well-known types such as
ordering errors, atomicity violations, and deadlocks)
have small bug-depths, and we confirm the efficiency of
our schedule randomization by detecting previously
unknown and known concurrency bugs in several
production-scale concurrent programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "concurrency; race conditions; randomized algorithms;
testing",
}
@Article{Zhang:2010:CDS,
author = "Wei Zhang and Chong Sun and Shan Lu",
title = "{ConMem}: detecting severe concurrency bugs through an
effect-oriented approach",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "179--192",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736041",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multicore technology is making concurrent programs
increasingly pervasive. Unfortunately, it is difficult
to deliver reliable concurrent programs, because of the
huge and non-deterministic interleaving space. In
reality, without the resources to thoroughly check the
interleaving space, critical concurrency bugs can slip
into production runs and cause failures in the field.
Approaches to making the best use of the limited
resources and exposing severe concurrency bugs before
software release would be desirable.\par
Unlike previous work that focuses on bugs caused by
specific interleavings (e.g., races and
atomicity-violations), this paper targets concurrency
bugs that result in one type of severe effects: program
crashes. Our study of the error-propagation process of
realworld concurrency bugs reveals a common pattern
(50\% in our non-deadlock concurrency bug set) that is
highly correlated with program crashes. We call this
pattern concurrency-memory bugs: buggy interleavings
directly cause memory bugs (NULL-pointer-dereference,
dangling-pointer, buffer-overflow, uninitialized-read)
on shared memory objects.\par
Guided by this study, we built ConMem to monitor
program execution, analyze memory accesses and
synchronizations, and predicatively detect these common
and severe concurrency-memory bugs. We also built a
validator ConMem-v to automatically prune false
positives by enforcing potential bug-triggering
interleavings.\par
We evaluated ConMem using 7 open-source programs with 9
real-world severe concurrency bugs. ConMem detects more
tested bugs (8 out of 9 bugs) than a lock-set-based
race detector and an unserializable-interleaving
detector that detect 4 and 5 bugs respectively, with a
false positive rate about one tenth of the compared
tools. ConMem-v further prunes out all the false
positives. ConMem has reasonable overhead suitable for
development usage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "concurrency bugs; software testing",
}
@Article{Mesa-Martinez:2010:CPT,
author = "Francisco Javier Mesa-Martinez and Ehsan K. Ardestani
and Jose Renau",
title = "Characterizing processor thermal behavior",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "193--204",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736043",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Temperature is a dominant factor in the performance,
reliability, and leakage power consumption of modern
processors. As a result, increasing numbers of
researchers evaluate thermal characteristics in their
proposals. In this paper, we measure a real processor
focusing on its thermal characterization executing
diverse workloads.\par
Our results show that in real designs, thermal
transients operate at larger scales than their
performance and power counterparts. Conventional
thermal simulation methodologies based on profile-based
simulation or statistical sampling, such as Simpoint,
tend to explore very limited execution spans. Short
simulation times can lead to reduced matchings between
performance and thermal phases. To illustrate these
issues we characterize and classify from a thermal
standpoint SPEC00 and SPEC06 applications, which are
traditionally used in the evaluation of architectural
proposals. This paper concludes with a list of
recommendations regarding thermal modeling
considerations based on our experimental insights.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "microarchitecture; temperature; thermal simulation",
}
@Article{Venkatesh:2010:CCR,
author = "Ganesh Venkatesh and Jack Sampson and Nathan Goulding
and Saturnino Garcia and Vladyslav Bryksin and Jose
Lugo-Martinez and Steven Swanson and Michael Bedford
Taylor",
title = "Conservation cores: reducing the energy of mature
computations",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "205--218",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736044",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Growing transistor counts, limited power budgets, and
the breakdown of voltage scaling are currently
conspiring to create a {\em utilization wall\/} that
limits the fraction of a chip that can run at full
speed at one time. In this regime, specialized,
energy-efficient processors can increase parallelism by
reducing the per-computation power requirements and
allowing more computations to execute under the same
power budget. To pursue this goal, this paper
introduces {\em conservation cores}. Conservation
cores, or {\em c-cores}, are specialized processors
that focus on reducing energy and energy-delay instead
of increasing performance. This focus on energy makes
c-cores an excellent match for many applications that
would be poor candidates for hardware acceleration
(e.g., irregular integer codes). We present a toolchain
for automatically synthesizing c-cores from application
source code and demonstrate that they can significantly
reduce energy and energy-delay for a wide range of
applications. The c-cores support patching, a form of
targeted reconfigurability, that allows them to adapt
to new versions of the software they target. Our
results show that conservation cores can reduce energy
consumption by up to 16.0x for functions and by up to
2.1x for whole applications, while patching can extend
the useful lifetime of individual c-cores to match that
of conventional processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "conservation core; heterogeneous many-core; patching;
utilization wall",
}
@Article{Sudan:2010:MPI,
author = "Kshitij Sudan and Niladrish Chatterjee and David
Nellans and Manu Awasthi and Rajeev Balasubramonian and
Al Davis",
title = "Micro-pages: increasing {DRAM} efficiency with
locality-aware data placement",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "219--230",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736045",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Power consumption and DRAM latencies are serious
concerns in modern chip-multiprocessor (CMP or
multi-core) based compute systems. The management of
the DRAM row buffer can significantly impact both power
consumption and latency. Modern DRAM systems read data
from cell arrays and populate a row buffer as large as
8 KB on a memory request. But only a small fraction of
these bits are ever returned back to the CPU. This ends
up wasting energy and time to read (and subsequently
write back) bits which are used rarely. Traditionally,
an open-page policy has been used for uni-processor
systems and it has worked well because of spatial and
temporal locality in the access stream. In future
multi-core processors, the possibly independent access
streams of each core are interleaved, thus destroying
the available locality and significantly
under-utilizing the contents of the row buffer. In this
work, we attempt to improve row-buffer utilization for
future multi-core systems.\par
The schemes presented here are motivated by our
observations that a large number of accesses within
heavily accessed OS pages are to small, contiguous
'chunks' of cache blocks. Thus, the co-location of
chunks (from different OS pages) in a row-buffer will
improve the overall utilization of the row buffer
contents, and consequently reduce memory energy
consumption and access time. Such co-location can be
achieved in many ways, notably involving a reduction in
OS page size and software or hardware assisted
migration of data within DRAM. We explore these
mechanisms and discuss the trade-offs involved along
with energy and performance improvements from each
scheme. On average, for applications with room for
improvement, our best performing scheme increases
performance by 9\% (max. 18\%) and reduces memory
energy consumption by 15\% (max. 70\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data placement; dram row-buffer management",
}
@Article{Pelley:2010:PRD,
author = "Steven Pelley and David Meisner and Pooya Zandevakili
and Thomas F. Wenisch and Jack Underwood",
title = "Power routing: dynamic power provisioning in the data
center",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "231--242",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735971.1736047",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data center power infrastructure incurs massive
capital costs, which typically exceed energy costs over
the life of the facility. To squeeze maximum value from
the infrastructure, researchers have proposed
over-subscribing power circuits, relying on the
observation that peak loads are rare. To ensure
availability, these proposals employ power capping,
which throttles server performance during utilization
spikes to enforce safe power budgets. However, because
budgets must be enforced locally -- at each power
distribution unit (PDU) -- local utilization spikes may
force throttling even when power delivery capacity is
available elsewhere. Moreover, the need to maintain
reserve capacity for fault tolerance on power delivery
paths magnifies the impact of utilization
spikes.\par
In this paper, we develop mechanisms to better utilize
installed power infrastructure, reducing reserve
capacity margins and avoiding performance throttling.
Unlike conventional high-availability data centers,
where collocated servers share identical primary and
secondary power feeds, we reorganize power feeds to
create shuffled power distribution topologies. Shuffled
topologies spread secondary power feeds over numerous
PDUs, reducing reserve capacity requirements to
tolerate a single PDU failure. Second, we propose Power
Routing, which schedules IT load dynamically across
redundant power feeds to: (1) shift slack to servers
with growing power demands, and (2) balance power draw
across AC phases to reduce heating and improve
electrical stability. We describe efficient heuristics
for scheduling servers to PDUs (an NP-complete
problem). Using data collected from nearly 1000 servers
in three production facilities, we demonstrate that
these mechanisms can reduce the required power
infrastructure capacity relative to conventional
high-availability data centers by 32\% without
performance degradation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data centers; power infrastructure",
}
@Article{Ahmad:2010:JOI,
author = "Faraz Ahmad and T. N. Vijaykumar",
title = "Joint optimization of idle and cooling power in data
centers while maintaining response time",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "243--256",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735971.1736048",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Server power and cooling power amount to a significant
fraction of modern data centers' recurring costs. While
data centers provision enough servers to guarantee
response times under the maximum loading, data centers
operate under much less loading most of the times
(e.g., 30-70\% of the maximum loading). Previous
server-power proposals exploit this under-utilization
to reduce the server idle power by keeping active only
as many servers as necessary and putting the rest into
low-power standby modes. However, these proposals incur
higher cooling power due to hot spots created by
concentrating the data center loading on fewer active
servers, or degrade response times due to
standby-to-active transition delays, or both. Other
proposals optimize the cooling power but incur
considerable idle power. To address the first issue of
power, we propose {\em PowerTrade}, which trades-off
idle power and cooling power for each other, thereby
reducing the total power. To address the second issue
of response time, we propose {\em SurgeGuard\/} to
overprovision the number of active servers beyond that
needed by the current loading so as to absorb future
increases in the loading. SurgeGuard is a two-tier
scheme which uses well-known over-provisioning at
coarse time granularities (e.g., one hour) to absorb
the common, smooth increases in the loading, and a
novel fine-grain replenishment of the over-provisioned
reserves at fine time granularities (e.g., five
minutes) to handle the uncommon, abrupt loading surges.
Using real-world traces, we show that combining
PowerTrade and SurgeGuard reduces total power by 30\%
compared to previous low-power schemes while
maintaining response times within 1.7\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cooling power; data center; idle power; power
management; response time",
}
@Article{Goodstein:2010:BAA,
author = "Michelle L. Goodstein and Evangelos Vlachos and Shimin
Chen and Phillip B. Gibbons and Michael A. Kozuch and
Todd C. Mowry",
title = "Butterfly analysis: adapting dataflow analysis to
dynamic parallel monitoring",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "257--270",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735971.1736050",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Online program monitoring is an effective technique
for detecting bugs and security attacks in running
applications. Extending these tools to monitor parallel
programs is challenging because the tools must account
for inter-thread dependences and relaxed memory
consistency models. Existing tools assume sequential
consistency and often slow down the monitored program
by orders of magnitude. In this paper, we present a
novel approach that avoids these pitfalls by not
relying on strong consistency models or detailed
inter-thread dependence tracking. Instead, we only
assume that events in the distant past on all threads
have become visible; we make no assumptions on (and
avoid the overheads of tracking) the relative ordering
of more recent events on other threads. To overcome the
potential state explosion of considering all the
possible orderings among recent events, we adapt two
techniques from static dataflow analysis, reaching
definitions and reaching expressions, to this new
domain of dynamic parallel monitoring. Significant
modifications to these techniques are proposed to
ensure the correctness and efficiency of our approach.
We show how our adapted analysis can be used in two
popular memory and security tools. We prove that our
approach does not miss errors, and sacrifices precision
only due to the lack of a relative ordering among
recent events. Moreover, our simulation study on a
collection of Splash-2 and Parsec 2.0 benchmarks
running a memory-checking tool on a hardware-assisted
logging platform demonstrates the potential benefits in
trading off a very low false positive rate for (i)
reduced overhead and (ii) the ability to run on relaxed
consistency models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data flow analysis; dynamic program monitoring;
parallel programming; static analysis",
}
@Article{Vlachos:2010:PEA,
author = "Evangelos Vlachos and Michelle L. Goodstein and
Michael A. Kozuch and Shimin Chen and Babak Falsafi and
Phillip B. Gibbons and Todd C. Mowry",
title = "{ParaLog}: enabling and accelerating online parallel
monitoring of multithreaded applications",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "271--284",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736051",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "{\em Instruction-grain lifeguards\/} monitor the
events of a running application at the level of
individual instructions in order to identify and help
mitigate application bugs and security exploits.
Because such lifeguards impose a 10-100X slowdown on
existing platforms, previous studies have proposed
hardware designs to accelerate lifeguard processing.
However, these accelerators are either tailored to a
specific class of lifeguards or suitable only for
monitoring single-threaded programs.\par
We present ParaLog, the first design of a system
enabling fast online parallel monitoring of
multithreaded parallel applications. ParaLog supports a
broad class of software-defined lifeguards. We show how
three existing accelerators can be enhanced to support
online multithreaded monitoring, dramatically reducing
lifeguard overheads. We identify and solve several
challenges in monitoring parallel applications and/or
parallelizing these accelerators, including (i)
enforcing inter-thread data dependences, (ii) dealing
with inter-thread effects that are not reflected in
coherence traffic, (iii) dealing with unmonitored
operating system activity, and (iv) ensuring lifeguards
can access shared metadata with negligible
synchronization overheads. We present our system design
for both Sequentially Consistent and Total Store
Ordering processors. We implement and evaluate our
design on a 16 core simulated CMP, using benchmarks
from SPLASH-2 and PARSEC and two lifeguards: a
data-flow tracking lifeguard and a memory-access
checker lifeguard. Our results show that (i) our
parallel accelerators improve performance by 2-9X and
1.13-3.4X for our two lifeguards, respectively, (ii) we
are 5-126X faster than the time-slicing approach
required by existing techniques, and (iii) our average
overheads for applications with eight threads are 51\%
and 28\% for the two lifeguards, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "hardware support for debugging; instruction-grain
lifeguards; online parallel monitoring",
}
@Article{Hormati:2010:MMS,
author = "Amir H. Hormati and Yoonseo Choi and Mark Woh and
Manjunath Kudlur and Rodric Rabbah and Trevor Mudge and
Scott Mahlke",
title = "{MacroSS}: macro-{SIMD}ization of streaming
applications",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "285--296",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736053",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "SIMD (Single Instruction, Multiple Data) engines are
an essential part of the processors in various
computing markets, from servers to the embedded domain.
Although SIMD-enabled architectures have the capability
of boosting the performance of many application domains
by exploiting data-level parallelism, it is very
challenging for compilers and also programmers to
identify and transform parts of a program that will
benefit from a particular SIMD engine. The focus of
this paper is on the problem of SIMDization for the
growing application domain of streaming. Streaming
applications are an ideal solution for targeting
multi-core architectures, such as shared/distributed
memory systems, tiled architectures, and single-core
systems. Since these architectures, in most cases,
provide SIMD acceleration units as well, it is highly
beneficial to generate SIMD code from streaming
programs. Specifically, we introduce MacroSS, which is
capable of performing macro-SIMDization on high-level
streaming graphs. Macro-SIMDization uses high-level
information such as execution rates of actors and
communication patterns between them to transform the
graph structure, vectorize actors of a streaming
program, and generate intermediate code. We also
propose low-overhead architectural modifications that
accelerate shuffling of data elements between the
scalar and vectorized parts of a streaming program. Our
experiments show that MacroSS is capable of generating
code that, on average, outperforms scalar code compiled
with the current state-of-art auto-vectorizing
compilers by 54\%. Using the low-overhead data
shuffling hardware, performance is improved by an
additional 8\% with less than 1\% area overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compiler; optimization; SIMD architecture; streaming",
}
@Article{Woo:2010:CPD,
author = "Dong Hyuk Woo and Hsien-Hsin S. Lee",
title = "{COMPASS}: a programmable data prefetcher using idle
{GPU} shaders",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "297--310",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735971.1736054",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A traditional fixed-function graphics accelerator has
evolved into a programmable general-purpose graphics
processing unit over the last few years. These powerful
computing cores are mainly used for accelerating
graphics applications or enabling low-cost scientific
computing. To further reduce the cost and form factor,
an emerging trend is to integrate GPU along with the
memory controllers onto the same die with the processor
cores. However, given such a system-on-chip, the GPU,
while occupying a substantial part of the silicon, will
sit idle and contribute nothing to the overall system
performance when running non-graphics workloads or
applications lack of data-level parallelism. In this
paper, we propose COMPASS, a compute shader-assisted
data prefetching scheme, to leverage the GPU resource
for improving single-threaded performance on an
integrated system. By harnessing the GPU shader cores
with very lightweight architectural support, COMPASS
can emulate the functionality of a hardware-based
prefetcher using the idle GPU and successfully improve
the memory performance of single-thread applications.
Moreover, thanks to its flexibility and
programmability, one can implement the best performing
prefetch scheme to improve each specific application as
demonstrated in this paper. With COMPASS, we envision
that a future application vendor can provide a
custom-designed COMPASS shader bundled with its
software to be loaded at runtime to optimize the
performance. Our simulation results show that COMPASS
can improve the single-thread performance of
memory-intensive applications by 68\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compute shader; GPU; prefetch",
}
@Article{Sanchez:2010:FAS,
author = "Daniel Sanchez and Richard M. Yoo and Christos
Kozyrakis",
title = "Flexible architectural support for fine-grain
scheduling",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "311--322",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736055",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To make efficient use of CMPs with tens to hundreds of
cores, it is often necessary to exploit fine-grain
parallelism. However, managing tasks of a few thousand
instructions is particularly challenging, as the
runtime must ensure load balance without compromising
locality and introducing small overheads. Software-only
schedulers can implement various scheduling algorithms
that match the characteristics of different
applications and programming models, but suffer
significant overheads as they synchronize and
communicate task information over the deep cache
hierarchy of a large-scale CMP. To reduce these costs,
hardware-only schedulers like Carbon, which implement
task queuing and scheduling in hardware, have been
proposed. However, a hardware-only solution fixes the
scheduling algorithm and leaves no room for other uses
of the custom hardware.\par
This paper presents a combined hardware-software
approach to build fine-grain schedulers that retain the
flexibility of software schedulers while being as fast
and scalable as hardware ones. We propose asynchronous
direct messages (ADM), a simple architectural extension
that provides direct exchange of asynchronous, short
messages between threads in the CMP without going
through the memory hierarchy. ADM is sufficient to
implement a family of novel, software-mostly schedulers
that rely on low-overhead messaging to efficiently
coordinate scheduling and transfer task information.
These schedulers match and often exceed the performance
and scalability of Carbon when using the same
scheduling algorithm. When the ADM runtime tailors its
scheduling algorithm to application characteristics, it
outperforms Carbon by up to 70\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "chip-multiprocessors; fine-grain scheduling;
many-core; messaging; scheduling; work-stealing",
}
@Article{Romanescu:2010:SDV,
author = "Bogdan F. Romanescu and Alvin R. Lebeck and Daniel J.
Sorin",
title = "Specifying and dynamically verifying address
translation-aware memory consistency",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "323--334",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736057",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computer systems with virtual memory are susceptible
to design bugs and runtime faults in their address
translation (AT) systems. Detecting bugs and faults
requires a clear specification of correct behavior. To
address this need, we develop a framework for AT-aware
memory consistency models. We expand and divide memory
consistency into the physical address memory
consistency (PAMC) model that defines the behavior of
operations on physical addresses and the virtual
address memory consistency (VAMC) model that defines
the behavior of operations on virtual addresses. As
part of this expansion, we show what AT features are
required to bridge the gap between PAMC and VAMC. Based
on our AT-aware memory consistency specifications, we
design efficient dynamic verification hardware that can
detect violations of VAMC and thus detect the effects
of design bugs and runtime faults, including most AT
related bugs in published errata.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "address translation; dynamic verification; memory
consistency; virtual memory",
}
@Article{Ebrahimi:2010:FST,
author = "Eiman Ebrahimi and Chang Joo Lee and Onur Mutlu and
Yale N. Patt",
title = "Fairness via source throttling: a configurable and
high-performance fairness substrate for multi-core
memory systems",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "335--346",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736058",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cores in a chip-multiprocessor (CMP) system share
multiple hardware resources in the memory subsystem. If
resource sharing is unfair, some applications can be
delayed significantly while others are unfairly
prioritized. Previous research proposed separate
fairness mechanisms in each individual resource. Such
resource-based fairness mechanisms implemented
independently in each resource can make contradictory
decisions, leading to low fairness and loss of
performance. Therefore, a coordinated mechanism that
provides fairness in the entire shared memory system is
desirable.\par
This paper proposes a new approach that provides
fairness in the {\em entire shared memory system},
thereby eliminating the need for and complexity of
developing fairness mechanisms for each individual
resource. Our technique, Fairness via Source Throttling
(FST), estimates the unfairness in the entire shared
memory system. If the estimated unfairness is above a
threshold set by system software, FST throttles down
cores causing unfairness by limiting the number of
requests they can inject into the system and the
frequency at which they do. As such, our {\em
source-based\/} fairness control ensures fairness
decisions are made in tandem in the entire memory
system. FST also enforces thread priorities/weights,
and enables system software to enforce different
fairness objectives and fairness-performance tradeoffs
in the memory system.\par
Our evaluations show that FST provides the best system
fairness and performance compared to four systems with
no fairness control and with state-of-the-art fairness
mechanisms implemented in both shared caches and memory
controllers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "fairness; multi-core systems; shared memory systems;
system performance",
}
@Article{Gelado:2010:ADS,
author = "Isaac Gelado and Javier Cabezas and Nacho Navarro and
John E. Stone and Sanjay Patel and Wen-mei W. Hwu",
title = "An asymmetric distributed shared memory model for
heterogeneous parallel systems",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "347--358",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736059",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous computing combines general purpose CPUs
with accelerators to efficiently execute both
sequential control-intensive and data-parallel phases
of applications. Existing programming models for
heterogeneous computing rely on programmers to
explicitly manage data transfers between the CPU system
memory and accelerator memory.\par
This paper presents a new programming model for
heterogeneous computing, called Asymmetric Distributed
Shared Memory (ADSM), that maintains a shared logical
memory space for CPUs to access objects in the
accelerator physical memory but not vice versa. The
asymmetry allows light-weight implementations that
avoid common pitfalls of symmetrical distributed shared
memory systems. ADSM allows programmers to assign data
objects to performance critical methods. When a method
is selected for accelerator execution, its associated
data objects are allocated within the shared logical
memory space, which is hosted in the accelerator
physical memory and transparently accessible by the
methods executed on CPUs.\par
We argue that ADSM reduces programming efforts for
heterogeneous computing systems and enhances
application portability. We present a software
implementation of ADSM, called GMAC, on top of CUDA in
a GNU/Linux environment. We show that applications
written in ADSM and running on top of GMAC achieve
performance comparable to their counterparts using
programmer-managed data transfers. This paper presents
the GMAC system and evaluates different design choices.
We further suggest additional architectural support
that will likely allow GMAC to achieve higher
application performance than the current CUDA model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "asymmetric distributed shared memory; data-centric
programming models; heterogeneous systems",
}
@Article{Bhattacharjee:2010:ICC,
author = "Abhishek Bhattacharjee and Margaret Martonosi",
title = "Inter-core cooperative {TLB} for chip
multiprocessors",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "359--370",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736060",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Translation Lookaside Buffers (TLBs) are commonly
employed in modern processor designs and have
considerable impact on overall system performance. A
number of past works have studied TLB designs to lower
access times and miss rates, specifically for
uniprocessors. With the growing dominance of chip
multiprocessors (CMPs), it is necessary to examine TLB
performance in the context of parallel
workloads.\par
This work is the first to present TLB prefetchers that
exploit commonality in TLB miss patterns across cores
in CMPs. We propose and evaluate two Inter-Core
Cooperative (ICC) TLB prefetching mechanisms, assessing
their effectiveness at eliminating TLB misses both
individually and together. Our results show these
approaches require at most modest hardware and can
collectively eliminate 19\% to 90\% of data TLB (D-TLB)
misses across the surveyed parallel workloads.\par
We also compare performance improvements across a range
of hardware and software implementation possibilities.
We find that while a fully-hardware implementation
results in average performance improvements of 8-46\%
for a range of TLB sizes, a hardware/software approach
yields improvements of 4-32\%. Overall, our work shows
that TLB prefetchers exploiting inter-core correlations
can effectively eliminate TLB misses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "parallelism; prefetching; translation lookaside
buffer",
}
@Article{Huang:2010:OES,
author = "Ruirui Huang and Daniel Y. Deng and G. Edward Suh",
title = "{Orthrus}: efficient software integrity protection on
multi-cores",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "371--384",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736062",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper proposes an efficient hardware/software
system that significantly enhances software security
through diversified replication on multi-cores. Recent
studies show that a large class of software attacks can
be detected by running multiple versions of a program
simultaneously and checking the consistency of their
behaviors. However, execution of multiple replicas
incurs significant overheads on today's computing
platforms, especially with fine-grained comparisons
necessary for high security. Orthrus exploits
similarities in automatically generated replicas to
enable simultaneous execution of those replicas with
minimal overheads; the architecture reduces memory and
bandwidth overheads by compressing multiple memory
spaces together, and additional power consumption and
silicon area by eliminating redundant computations.
Utilizing the hardware architecture, Orthrus implements
a fine-grained memory layout diversification with the
LLVM compiler and can detect corruptions in both
pointers and critical data. Experiments indicate that
the Orthrus architecture incurs minimal overheads and
provides a protection against a broad range of
attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "memory protection; multi-core architecture;
replication-aware architecture; software diversity and
redundancy; software security",
}
@Article{Feng:2010:SPS,
author = "Shuguang Feng and Shantanu Gupta and Amin Ansari and
Scott Mahlke",
title = "Shoestring: probabilistic soft error reliability on
the cheap",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "385--396",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736063",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aggressive technology scaling provides designers with
an ever increasing budget of cheaper and faster
transistors. Unfortunately, this trend is accompanied
by a decline in individual device reliability as
transistors become increasingly susceptible to soft
errors. We are quickly approaching a new era where
resilience to soft errors is no longer a luxury that
can be reserved for just processors in
high-reliability, mission-critical domains. Even
processors used in mainstream computing will soon
require protection. However, due to tighter profit
margins, reliable operation for these devices must come
at little or no cost. This paper presents Shoestring, a
minimally invasive software solution that provides high
soft error coverage with very little overhead, enabling
its deployment even in commodity processors with
'shoestring' reliability budgets. Leveraging
intelligent analysis at compile time, and exploiting
low-cost, symptom-based error detection, Shoestring is
able to focus its efforts on protecting
statistically-vulnerable portions of program code.
Shoestring effectively applies instruction duplication
to protect only those segments of code that, when
subjected to a soft error, are likely to result in
user-visible faults without first exhibiting
symptomatic behavior. Shoestring is able to recover
from an additional 33.9\% of soft errors that are
undetected by a symptom-only approach, achieving an
overall user-visible failure rate of 1.6\%. This
reliability improvement comes at a modest performance
overhead of 15.8\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compiler analysis; error detection; fault injection",
}
@Article{Yoon:2010:VFE,
author = "Doe Hyun Yoon and Mattan Erez",
title = "Virtualized and flexible {ECC} for main memory",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "397--408",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1736020.1736064",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a general scheme for virtualizing main
memory error-correction mechanisms, which map redundant
information needed to correct errors into the memory
namespace itself. We rely on this basic idea, which
increases flexibility to increase error protection
capabilities, improve power efficiency, and reduce
system cost; with only small performance overheads. We
augment the virtual memory system architecture to
detach the physical mapping of data from the physical
mapping of its associated ECC information. We then use
this mechanism to develop two-tiered error protection
techniques that separate the process of detecting
errors from the rare need to also correct errors, and
thus save energy. We describe how to provide strong
chipkill and double-chip kill protection using existing
DRAM and packaging technology. We show how to maintain
access granularity and redundancy overheads, even when
using $\times 8$ DRAM chips. We also evaluate error
correction for systems that do not use ECC DIMMs.
Overall, analysis of demanding SPEC CPU 2006 and PARSEC
benchmarks indicates that performance overhead is only
1\% with ECC DIMMs and less than 10\% using standard
Non-ECC DIMM configurations, that DRAM power savings
can be as high as 27\%, and that the system
energy-delay product is improved by 12\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "error correction; fault tolerance; memory systems;
reliability",
}
@Article{Li:2010:AAB,
author = "Minming Li and Chun Jason Xue and Tiantian Liu and
Yingchao Zhao",
title = "Analysis and approximation for bank selection
instruction minimization on partitioned memory
architecture",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "1--8",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755890",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A large number of embedded systems include 8-bit
microcontrollers for their energy efficiency and low
cost. Multi-bank memory architecture is commonly
applied in 8-bit microcontrollers to increase the size
of memory without extending address buses. To switch
among different memory banks, a special instruction,
Bank Selection, is used. How to minimize the number of
bank selection instructions inserted is important to
reduce code size for embedded systems.\par
In this paper, we consider how to insert the minimum
number of bank selection instructions in a program to
achieve feasibility. A program can be represented by a
control flow graph (CFG). We prove that it is NP-Hard
to insert the minimum number of bank selection
instructions if all the variables are pre-assigned to
memory banks. Therefore, we introduce a 2-approximation
algorithm using a rounding method. When the CFG is a
tree or the out-degree of each node in the CFG is at
most two, we show that we can insert the bank selection
instructions optimally in polynomial time. We then
consider the case when there are some nodes that do not
access any memory bank and design a dynamic programming
method to compute the optimal insertion strategy when
the CFG is a tree. Experimental result shows the
proposed techniques can reduce bank selection
instructions significantly on partitioned memory
architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "bank selection instruction minimization; partitioned
memory architecture",
}
@Article{Pyka:2010:VSL,
author = "Robert Pyka and Felipe Klein and Peter Marwedel and
Stylianos Mamagkakis",
title = "Versatile system-level memory-aware platform
description approach for embedded {MPSoCs}",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "9--16",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755891",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we present a novel system modeling
language which targets primarily the development of
source-level multiprocessor memory aware
optimizations.\par
In contrast to previous system modeling approaches this
approach tries to model the whole system and especially
the memory hierarchy in a structural and semantically
accessible way. Previous approaches primarily support
generation of simulators or retargetable code selectors
and thus concentrate on pure behavioral models or
describe only the processor instruction set in a
semantically accessible way, A simple, database-like,
interface is offered to the optimization developer,
which in conjunction with the MACCv2 framework enables
rapid development of source-level architecture
independent optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "architecture description; channel; component;
configuration; definition; energy models; framework",
}
@Article{Kim:2010:ODM,
author = "Yongjoo Kim and Jongeun Lee and Aviral Shrivastava and
Yunheung Paek",
title = "Operation and data mapping for {CGRAs} with multi-bank
memory",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "17--26",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755892",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Coarse Grain Reconfigurable Architectures (CGRAs)
promise high performance at high power efficiency. They
fulfill this promise by keeping the hardware extremely
simple, and moving the complexity to application
mapping. One major challenge comes in the form of data
mapping. For reasons of power-efficiency and
complexity, CGRAs use multi-bank local memory, and a
row of PEs share memory access. In order for each row
of the PEs to access any memory bank, there is a
hardware arbiter between the memory requests generated
by the PEs and the banks of the local memory. However,
a fundamental restriction remains that a bank cannot be
accessed by two different PEs at the same time. We
propose to meet this challenge by mapping application
operations onto PEs and data into memory banks in a way
that avoids such conflicts. Our experimental results on
kernels from multimedia benchmarks demonstrate that our
local memory-aware compilation approach can generate
mappings that are up to 40\% better in performance
(17.3\% on average) compared to a memory-unaware
scheduler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "arbiter; bank conflict; coarse-grained reconfigurable
architecture; compilation; multi-bank memory",
}
@Article{Foroozannejad:2010:LDB,
author = "Mohammad H. Foroozannejad and Matin Hashemi and Trevor
L. Hodges and Soheil Ghiasi",
title = "Look into details: the benefits of fine-grain
streaming buffer analysis",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "27--36",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755894",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many embedded applications demand processing of a
seemingly endless stream of input data in real-time.
Productive development of such applications is
typically carried out by synthesizing software from
high-level specifications, such as data-flow graphs. In
this context, we study the problem of inter-actor
buffer allocation, which is a critical step during
compilation of streaming applications. We argue that
fine-grain analysis of buffers' spatio-temporal
characteristics, as opposed to conventional live range
analysis, enables dramatic improvements in buffer
sharing. Improved sharing translates to reduction of
the compiled binary memory footprint, which is of prime
concern in many embedded systems. We transform the
buffer allocation problem to two-dimensional packing
using complex polygons. We develop an evolutionary
packing algorithm, which readily yields buffer
allocations. Experimental results show an average of
over 7X and 2X improvement in total buffer size,
compared to baseline and conventional live range
analysis schemes, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "buffer management; optimization; software synthesis;
streaming applications; synchronous data flow",
}
@Article{Perathoner:2010:MSE,
author = "Simon Perathoner and Tobias Rein and Lothar Thiele and
Kai Lampka and Jonas Rox",
title = "Modeling structured event streams in system level
performance analysis",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "37--46",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755895",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper extends the methodology of analytic
real-time analysis of distributed embedded systems
towards merging and extracting sub-streams based on
event type information. For example, one may first
merge a set of given event streams, then process them
jointly and finally decompose them into separate
streams again. In other words, data streams can be
hierarchically composed into higher level event streams
and decomposed later on again. The proposed technique
is strictly compositional, hence highly suited for
being embedded into well known performance evaluation
frameworks such as Symta/S and MPA (Modular Performance
Analysis). It is based on a novel characterization of
structured event streams which we denote as Event Count
Curves. They characterize the structure of event
streams in which the individual events belong to a
finite number of classes. This new concept avoids the
explicit maintenance of stream-individual information
when routing a composed stream through a network of
system components. Nevertheless it allows an arbitrary
composition and decomposition of sub-streams at any
stage of the distributed event processing. For
evaluating our approach we analyze a realistic
case-study and compare the obtained results with other
existing techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "event count curves; performance analysis; real-time
calculus",
}
@Article{Brandt:2010:TCA,
author = "Jens Brandt and Klaus Schneider and Sandeep K.
Shukla",
title = "Translating concurrent action oriented specifications
to synchronous guarded actions",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "47--56",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755896",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent Action-Oriented Specifications (CAOS) model
the behavior of a synchronous hardware circuit as
asynchronous guarded actions at an abstraction level
higher than the Register Transfer Level (RTL). Previous
approaches always considered the compilation of CAOS,
which includes a transformation of the under-lying
model of computation and the scheduling of guarded
actions per clock cycle, as a tightly integrated step.
In this paper, we present a new compilation procedure,
which separates these two tasks and translates CAOS
models to synchronous guarded actions with an explicit
interface to a scheduler. This separation of concerns
has many advantages, including better analyses and
integration of custom schedulers. Our method also
generates assertions that each scheduler must obey that
can be fulfilled by algorithms for scheduler synthesis
like those developed in supervisory control. We present
our translation procedure in detail and illustrate it
by various examples. We also show that our method
simplifies formal verification of hardware synthesized
from CAOS specifications over previously known formal
verification approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "code generation; concurrent action-oriented
specifications; guarded commands; synchronous
languages",
}
@Article{Delaval:2010:CMD,
author = "Gwena{\"e}l Delaval and Herv{\'e} Marchand and Eric
Rutten",
title = "Contracts for modular discrete controller synthesis",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "57--66",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755898",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe the extension of a reactive programming
language with a behavioral contract construct. It is
dedicated to the programming of reactive control of
applications in embedded systems, and involves
principles of the supervisory control of discrete event
systems. Our contribution is in a language approach
where modular discrete controller synthesis (DCS) is
integrated, and it is concretized in the encapsulation
of DCS into a compilation process. From transition
system specifications of possible behaviors, DCS
automatically produces controllers that make the
controlled system satisfy the property given as
objective. Our language features and compiling
technique provide correctness-by-construction in that
sense, and enhance reliability and verifiability. Our
application domain is adaptive and reconfigurable
systems: closed-loop adaptation mechanisms enable
flexible execution of functionalities w.r.t. changing
resource and environment conditions. Our language can
serve programming such adaption controllers. This paper
particularly describes the compilation of the language.
We present a method for the modular application of
discrete controller synthesis on synchronous programs,
and its integration in the BZR language. We consider
structured programs, as a composition of nodes, and
first apply DCS on particular nodes of the program, in
order to reduce the complexity of the controller
computation; then, we allow the abstraction of parts of
the program for this computation; and finally, we show
how to recompose the different controllers computed
from different abstractions for their correct
co-execution with the initial program. Our work is
illustrated with examples, and we present quantitative
results about its implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "adaptive and reconfigurable systems; components;
contracts; discrete controller synthesis; modularity;
reactive systems; synchronous programming",
}
@Article{Schlickling:2010:SAD,
author = "Marc Schlickling and Markus Pister",
title = "Semi-automatic derivation of timing models for {WCET}
analysis",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "67--76",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755899",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded systems are widely used for supporting our
every day life. In the area of safety-critical systems
human life often depends on the system's correct
behavior. Many of such systems are hard real-time
systems, so that the notion of correctness not only
means functional correctness. They additionally have to
obey stringent timing constraints, i.e. timely task
completion under all circumstances is essential. An
example for such a safety-critical system is the flight
control computer in an airplane, which is responsible
for stability, attitude and path control.\par
In order to derive guarantees on the timing behavior of
hard real-time systems, the worst-case execution time
(WCET) of each task in the system has to be determined.
Saarland University and AbsInt GmbH have successfully
developed the aiT WCET analyzer for computing safe
upper bounds on the WCET of a task. The computation is
mainly based on abstract interpretation of timing
models of the processor and its periphery. Such timing
models are currently hand-crafted by human experts.
Therefore their implementation is a time-consuming and
error-prone process.\par
Modern processors or system controllers are
automatically synthesized out of formal hardware
specifications like VHDL or Verilog. Besides the
system' functional behavior, such specifications
provide all information needed for the creation of a
timing model. But due to their size and complexity,
manually examining the sources is even more complex
than only looking at the processor manuals. Moreover,
this would not reduce the effort nor the probability of
implementation errors.\par
To face this problem, this paper proposes a method for
semi-automatically deriving suitable timing models out
of formal hardware specifications in VHDL that fit to
the tool chain of the aiT WCET analyzer. By this, we
reduce the creation time of timing models from months
to weeks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "hard real-time; vhdl; worst-case execution time",
}
@Article{Viskic:2010:DEA,
author = "Ines Viskic and Lochi Yu and Daniel Gajski",
title = "Design exploration and automatic generation of {MPSoC}
platform {TLMs} from {Kahn Process Network}
applications",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "77--84",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755900",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With increasingly more complex Multi-Processor Systems
on Chip (MPSoC) and shortening time-to-market
projections, Transaction Level Modeling and Platform
Aware Design are seen as promising >approaches to
efficient MPSoC design.\par
In this paper, we present an automatized 3-phase
process of Platform Aware Design and apply it to Kahn
Process Networks (KPN) applications, a widely used
model of computation for data-flow applications. We
start with the KPN application and an abstract platform
template and automatically generate an executable TLM
with estimated timing that accurately reflects the
system platform. We support homogeneous and
heterogeneous multi-master platform models with shared
memory or direct communication paradigm. The
communication in heterogeneous platform modules is
enabled with the transducer unit (TX) for protocol
translation. TX units also act as message routers to
support Network on Chip (NoC) communication.\par
We evaluate our approach with the case study of the
H.264 Encoder design process, in which the
specification compliant design was reached from the KPN
application in less than 2 hours. The example
demonstrates that automatic generation of platform
aware TLMs enables a fast, efficient and error
resilient design process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "automatic generation; Kahn Process Network; process
mapping; transaction level model",
}
@Article{Ozturk:2010:CDN,
author = "Ozcan Ozturk and Mahmut Kandemir and Mary J. Irwin and
Sri H. K. Narayanan",
title = "Compiler directed network-on-chip reliability
enhancement for chip multiprocessors",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "85--94",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755902",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Chip multiprocessors (CMPs) are expected to be the
building blocks for future computer systems. While
architecting these emerging CMPs is a challenging
problem on its own, programming them is even more
challenging. As the number of cores accommodated in
chip multiprocessors increases, network-on-chip (NoC)
type communication fabrics are expected to replace
traditional point-to-point buses. Most of the prior
software related work so far targeting CMPs focus on
performance and power aspects. However, as technology
scales, components of a CMP are being increasingly
exposed to both transient and permanent hardware
failures. This paper presents and evaluates a
compiler-directed power-performance aware reliability
enhancement scheme for network-on-chip (NoC) based chip
multiprocessors (CMPs). The proposed scheme improves
on-chip communication reliability by duplicating
messages traveling across CMP nodes such that, for each
original message, its duplicate uses a different set of
communication links as much as possible (to satisfy
performance constraint). In addition, our approach
tries to reuse communication links across the different
phases of the program to maximize link shutdown
opportunities for the NoC (to satisfy power
constraint). Our results show that the proposed
approach is very effective in improving on-chip network
reliability, without causing excessive power or
performance degradation. In our experiments, we also
evaluate the performance oriented and energy oriented
versions of our compiler-directed reliability
enhancement scheme, and compare it to two pure hardware
based fault tolerant routing schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "chip multiprocessors; compiler; noc; reliability",
}
@Article{Kulkarni:2010:IBP,
author = "Prasad A. Kulkarni and Michael R. Jantz and David B.
Whalley",
title = "Improving both the performance benefits and speed of
optimization phase sequence searches",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "95--104",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755903",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The issues of compiler optimization phase ordering and
selection present important challenges to compiler
developers in several domains, and in particular to the
speed, code size, power, and cost-constrained domain of
embedded systems. Different sequences of optimization
phases have been observed to provide the best
performance for different applications. Compiler
writers and embedded systems developers have recently
addressed this problem by conducting iterative
empirical searches using machine-learning based
heuristic algorithms in an attempt to find the phase
sequences that are most effective for each application.
Such searches are generally performed at the program
level, although a few studies have been performed at
the function level. The finer granularity of
function-level searches has the potential to provide
greater overall performance benefits, but only at the
cost of slower searches caused by a greater number of
performance evaluations that often require expensive
program simulations. In this paper, we evaluate the
performance benefits and search time increases of
function-level approaches as compared to their
program-level counterparts. We, then, present a novel
search algorithm that conducts distinct function-level
searches simultaneously, but requires only a single
program simulation for evaluating the performance of
potentially unique sequences for each function. Thus,
our new hybrid search strategy provides the enhanced
performance benefits of function-level searches with a
search-time cost that is comparable to or less than
program-level searches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "genetic algorithms; phase ordering",
}
@Article{Li:2010:ECU,
author = "Weijia Li and Youtao Zhang",
title = "An efficient code update scheme for {DSP} applications
in mobile embedded systems",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "105--114",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755904",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "DSP processors usually provide dedicated address
generation units (AGUs) to assist address computation.
By carefully allocating variables in the memory, DSP
compilers take advantage of AGUs and generate efficient
code with compact size and improved performance.
However, DSP applications running on mobile embedded
systems often need to be updated after their initial
releases. Studies showed that small changes at the
source code level may significantly change the variable
layout in the memory and thus the binary code, which
causes large energy overheads to mobile embedded
systems that patch through wireless or satellite
communication, and often pecuniary burden to the
users.\par
In this paper, we propose an update-conscious code
update scheme to effectively reduce patch size. It
first performs incremental offset assignment based on a
recent variable coalescing heuristic, and then
summarizes the code difference using two types of
update primitives. Our experimental results showed that
using update-conscious code update can greatly improve
code similarity and thus reduce the update script
sizes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "context-aware script; context-unaware script;
incremental coalescing general offset assignment
(icgoa); incremental coalescing simple offset
assignment (icsoa)",
}
@Article{Wernsing:2010:ECF,
author = "John Robert Wernsing and Greg Stitt",
title = "Elastic computing: a framework for transparent,
portable, and adaptive multi-core heterogeneous
computing",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "115--124",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755906",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the past decade, system architectures have
started on a clear trend towards increased parallelism
and heterogeneity, often resulting in speedups of 10x
to 100x. Despite numerous compiler and high-level
synthesis studies, usage of such systems has largely
been limited to device experts, due to significantly
increased application design complexity. To reduce
application design complexity, we introduce elastic
computing - a framework that separates functionality
from implementation details by enabling designers to
use specialized functions, called elastic functions,
which enable an optimization framework to explore
thousands of possible implementations, even ones using
different algorithms. Elastic functions allow designers
to execute the same application code efficiently on
potentially any architecture and for different runtime
parameters such as input size, battery life, etc. In
this paper, we present an initial elastic computing
framework that transparently optimizes application code
onto diverse systems, achieving significant speedups
ranging from 1.3x to 46x on a hyper-threaded Xeon
system with an FPGA accelerator, a 16-CPU Opteron
system, and a quad-core Xeon system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "elastic computing; fpga; heterogeneous architectures;
multi-core; speedup",
}
@Article{Biehl:2010:ISA,
author = "Matthias Biehl and Chen DeJiu and Martin
T{\"o}rngren",
title = "Integrating safety analysis into the model-based
development toolchain of automotive embedded systems",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "125--132",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755907",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The automotive industry has a growing demand for the
seamless integration of safety analysis tools into the
model-based development toolchain for embedded systems.
This requires translating concepts of the automotive
domain to the safety domain. We automate such a
translation between the automotive architecture
description language EAST-ADL2 and the safety analysis
tool HiP-HOPS by using model transformations and by
leveraging the advantages of different model
transformation techniques. Through this integration,
the analysis can be conducted early in the development
process, when the system can be redesigned to fulfill
safety goals with relatively low effort and cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "architecture description language; model-based
development; safety analysis; tool integration",
}
@Article{Fischmeister:2010:SBP,
author = "Sebastian Fischmeister and Yanmeng Ba",
title = "Sampling-based program execution monitoring",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "133--142",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755908",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For its high overall cost during product development,
program debugging is an important aspect of system
development. Debugging is a hard and complex activity,
especially in time-sensitive systems which have limited
resources and demanding timing constraints. System
tracing is a frequently used technique for debugging
embedded systems. A specific use of system tracing is
to monitor and debug control-flow problems in programs.
However, it is difficult to implement because of the
potentially high overhead it might introduce to the
system and the changes which can occur to the system
behavior due to tracing. To solve the above problems,
in this work, we present a sampling-based approach to
execution monitoring which specifically helps
developers debug time-sensitive systems such as
real-time applications. We build the system model and
propose three theorems to determine the sampling period
in different scenarios. We also design seven heuristics
and an instrumentation framework to extend the sampling
period which can reduce the monitoring overhead and
achieve an optimal tradeoff between accuracy and
overhead introduced by instrumentation. Using this
monitoring framework, we can use the information
extracted through sampling to reconstruct the system
state and execution paths to locate the deviation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "debugging; embedded system; monitoring; sampling;
tracing",
}
@Article{Shrivastava:2010:CVE,
author = "Aviral Shrivastava and Jongeun Lee and Reiley
Jeyapaul",
title = "Cache vulnerability equations for protecting data in
embedded processor caches from soft errors",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "143--152",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755910",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Continuous technology scaling has brought us to a
point, where transistors have become extremely
susceptible to cosmic radiation strikes, or soft
errors. Inside the processor, caches are most
vulnerable to soft errors, and techniques at various
levels of design abstraction, e.g., fabrication, gate
design, circuit design, and microarchitecture-level,
have been developed to protect data in caches. However,
no work has been done to investigate the effect of code
transformations on the vulnerability of data in caches.
Data is vulnerable to soft errors in the cache only if
it will be read by the processor, and not if it will be
overwritten. Since code transformations can change the
read-write pattern of program variables, they
significantly effect the soft error vulnerability of
program variables in the cache. We observe that often
opportunity exists to significantly reduce the soft
error vulnerability of cache data by trading-off a
little performance. However, even if one wanted to
exploit this trade-off, it is difficult, since there
are no efficient techniques to estimate vulnerability
of data in caches. To this end, this paper develops
efficient static analysis method to estimate program
vulnerability in caches, which enables the compiler to
exploit the performance-vulnerability trade-offs in
applications. Finally, as compared to simulation based
estimation, static analysis techniques provide the
insights into vulnerability calculations that provide
some simple schemes to reduce program vulnerability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cache vulnerability; code transformation; compiler
technique; embedded processors; soft errors; static
analysis",
}
@Article{Altmeyer:2010:RAT,
author = "Sebastian Altmeyer and Claire Maiza and Jan Reineke",
title = "Resilience analysis: tightening the {CRPD} bound for
set-associative caches",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "153--162",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755888.1755911",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In preemptive real-time systems, scheduling analyses
need --- in addition to the worst-case execution time
the context-switch cost. In case of preemption, the
preempted and the preempting task may interfere on the
cache memory.\par
This interference leads to additional cache misses in
the preempted task. The delay due to these cache misses
is referred to as the cache-related preemption
delay (CRPD), which constitutes the major part of the
context-switch cost.\par
In this paper, we present a new approach to compute
tight bounds on the CRPD for LRU set-associative
caches, based on analyses of both the preempted and the
preempting task. Previous approaches analyzing both the
preempted and the preempting task were either imprecise
or unsound.\par
As the basis of our approach we introduce the notion of
resilience: The resilience of a memory block of the
preempted task is the maximal number of memory accesses
a preempting task could perform without causing an
additional miss to this block. By computing lower
bounds on the resilience of blocks and an upper bound
on the number of accesses by a preempting task, one can
guarantee that some blocks may not contribute to the
CRPD. The CRPD analysis based on resilience
considerably outperforms previous approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cache-related preemption delay; lru caches; timing
analysis",
}
@Article{Wang:2010:RRA,
author = "Yi Wang and Duo Liu and Meng Wang and Zhiwei Qin and
Zili Shao and Yong Guan",
title = "{RNFTL}: a reuse-aware {NAND} flash translation layer
for flash memory",
journal = j-SIGPLAN,
volume = "45",
number = "4",
pages = "163--172",
month = apr,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1755951.1755912",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 15 12:45:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we propose a hybrid-level flash
translation layer (FTL) called RNFTL (Reuse-Aware NFTL)
to improve the endurance and space utilization of NAND
flash memory. Our basic idea is to prevent a primary
block with many free pages from being erased in a merge
operation. The preserved primary blocks are further
reused as replacement blocks. In such a way, the space
utilization and the number of erase counts for each
block in NAND flash can be enhanced. To the best of our
knowledge, this is the first work to employ a
reuse-aware strategy in FTL for improving the space
utilization and endurance of NAND flash. We conduct
experiments on a set of traces that collected from real
workload in daily life. The experimental results show
that our technique has significant improvement on space
utilization, block lifetime and wear-leveling compared
with the previous work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "endurance; flash memory; reuse; space utilization;
wear-leveling",
}
@Article{Agerwala:2010:ECC,
author = "Tilak Agerwala",
title = "Exascale computing: the challenges and opportunities
in the next decade",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "1--2",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693454",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Supercomputing systems have made great strides in
recent years as the extensive computing needs of
cutting-edge engineering work and scientific discovery
have driven the development of more powerful systems.
In 2008, the first petaflop machine was released, and
historic trends indicate that in ten years, we should
be at the exascale level. Indeed, various agencies are
targeting a computer system capable of 1 Exaop (10**18
ops) of computation within the next decade. We believe
that applications in many industries will be materially
transformed by exascale computers.\par
Meeting the exascale challenge will require significant
innovation in technology, architecture and
programmability. Power is a fundamental problem at all
levels; traditional memory cost and performance are not
keeping pace with compute potential; the storage
hierarchy will have to be re-architected; networks will
be a much bigger part of the system cost; reliability
at exascale levels will require a holistic approach to
architecture design, and programmability and
ease-of-use will be an essential component to extract
the promised performance at the exascale level.\par
In this talk, I will discuss the importance of exascale
computing and address the major challenges, touching on
the areas of technology, architecture, reliability and
usability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "architecture; challenges; exascale",
}
@Article{Mendez-Lojo:2010:SDO,
author = "Mario M{\'e}ndez-Lojo and Donald Nguyen and Dimitrios
Prountzos and Xin Sui and M. Amber Hassaan and Milind
Kulkarni and Martin Burtscher and Keshav Pingali",
title = "Structure-driven optimizations for amorphous
data-parallel programs",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "3--14",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693457",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Irregular algorithms are organized around
pointer-based data structures such as graphs and trees,
and they are ubiquitous in applications. Recent work by
the Galois project has provided a systematic approach
for parallelizing irregular applications based on the
idea of optimistic or speculative execution of
programs. However, the overhead of optimistic parallel
execution can be substantial. In this paper, we show
that many irregular algorithms have structure that can
be exploited and present three key optimizations that
take advantage of algorithmic structure to reduce
speculative overheads. We describe the implementation
of these optimizations in the Galois system and present
experimental results to demonstrate their benefits. To
the best of our knowledge, this is the first system to
exploit algorithmic structure to optimize the execution
of irregular programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "amorphous data-parallelism; cautious operator
implementations; irregular programs; iteration
coalescing; one-shot optimization; optimistic
parallelization; synchronization overheads",
}
@Article{Coons:2010:GEU,
author = "Katherine E. Coons and Sebastian Burckhardt and
Madanlal Musuvathi",
title = "{GAMBIT}: effective unit testing for concurrency
libraries",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "15--24",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693458",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As concurrent programming becomes prevalent, software
providers are investing in concurrency libraries to
improve programmer productivity. Concurrency libraries
improve productivity by hiding error-prone, low-level
synchronization from programmers and providing
higher-level concurrent abstractions. Testing such
libraries is difficult, however, because concurrency
failures often manifest only under particular
scheduling circumstances. Current best testing
practices are often inadequate: heuristic-guided
fuzzing is not systematic, systematic schedule
enumeration does not find bugs quickly, and stress
testing is neither systematic nor fast.\par
To address these shortcomings, we propose a prioritized
search technique called GAMBIT that combines the speed
benefits of heuristic-guided fuzzing with the
soundness, progress, and reproducibility guarantees of
stateless model checking. GAMBIT combines known
techniques such as partial-order reduction and
preemption-bounding with a generalized best-first
search frame- work that prioritizes schedules likely to
expose bugs. We evaluate GAMBIT's effectiveness on
newly released concurrency libraries for Microsoft's
{.NET} framework. Our experiments show that GAMBIT
finds bugs more quickly than prior stateless model
checking techniques without compromising coverage
guarantees or reproducibility.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "concurrency; model checking; multithreading;
partial-order reduction; preemption bound; software
testing",
}
@Article{Lee:2010:FXC,
author = "Jonathan K. Lee and Jens Palsberg",
title = "Featherweight {X10}: a core calculus for async-finish
parallelism",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "25--36",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693459",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a core calculus with two of X10's key
constructs for parallelism, namely async and finish.
Our calculus forms a convenient basis for type systems
and static analyses for languages with async-finish
parallelism, and for tractable proofs of correctness.
For example, we give a short proof of the
deadlock-freedom theorem of Saraswat and Jagadeesan.
Our main contribution is a type system that solves the
open problem of context-sensitive
may-happen-in-parallel analysis for languages with
async-finish parallelism. We prove the correctness of
our type system and we report experimental results of
performing type inference on 13,000 lines of X10 code.
Our analysis runs in polynomial time, takes a total of
28 seconds on our benchmarks, and produces a low number
of false positives, which suggests that our analysis is
a good basis for other analyses such as race
detectors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "operational semantics; parallelism; static analysis",
}
@Article{Mannarswamy:2010:CAS,
author = "Sandya Mannarswamy and Dhruva R. Chakrabarti and
Kaushik Rajan and Sujoy Saraswati",
title = "Compiler aided selective lock assignment for improving
the performance of software transactional memory",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "37--46",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693460",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Atomic sections have been recently introduced as a
language construct to improve the programmability of
concurrent software. They simplify programming by not
requiring the explicit specification of locks for
shared data. Typically atomic sections are supported in
software either through the use of optimistic
concurrency by using transactional memory or through
the use of pessimistic concurrency using
compiler-assigned locks. As a software transactional
memory (STM) system does not take advantage of the
specific memory access patterns of an application it
often suffers from false conflicts and high validation
overheads. On the other hand, the compiler usually ends
up assigning coarse grain locks as it relies on whole
program points-to analysis which is conservative by
nature. This adversely affects performance by limiting
concurrency. In order to mitigate the disadvantages
associated with STM's lock assignment scheme, we
propose a hybrid approach which combines STM's lock
assignment with a compiler aided selective lock
assignment scheme (referred to as SCLA-STM). SCLA-STM
overcomes the inefficiencies associated with a purely
compile-time lock assignment approach by (i) using the
underlying STM for shared variables where only a
conservative analysis is possible by the compiler
(e.g., in the presence of may-alias points to
information) and (ii) being selective about the shared
data chosen for the compiler-aided lock assignment. We
describe our prototype SCLA-STM scheme implemented in
the HP-UX IA-64 C/C++ compiler, using TL2 as our STM
implementation. We show that SCLA-STM improves
application performance for certain STAMP benchmarks
from 1.68\% to 37.13\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compilers; multithreading; parallelization;
performance",
}
@Article{Rossbach:2010:TPA,
author = "Christopher J. Rossbach and Owen S. Hofmann and Emmett
Witchel",
title = "Is transactional programming actually easier?",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "47--56",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693462",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Chip multi-processors (CMPs) have become ubiquitous,
while tools that ease concurrent programming have not.
The promise of increased performance for all
applications through ever more parallel hardware
requires good tools for concurrent programming,
especially for average programmers. Transactional
memory (TM) has enjoyed recent interest as a tool that
can help programmers program concurrently.\par
The transactional memory (TM) research community is
heavily invested in the claim that programming with
transactional memory is easier than alternatives (like
locks), but evidence for or against the veracity of
this claim is scant. In this paper, we describe a
user-study in which 237 undergraduate students in an
operating systems course implement the same programs
using coarse and fine-grain locks, monitors, and
transactions. We surveyed the students after the
assignment, and examined their code to determine the
types and frequency of programming errors for each
synchronization technique. Inexperienced programmers
found baroque syntax a barrier to entry for
transactional programming. On average, subjective
evaluation showed that students found transactions
harder to use than coarse-grain locks, but slightly
easier to use than fine-grained locks. Detailed
examination of synchronization errors in the students'
code tells a rather different story. Overwhelmingly,
the number and types of programming errors the students
made was much lower for transactions than for locks. On
a similar programming problem, over 70\% of students
made errors with fine-grained locking, while less than
10\% made errors with transactions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "optimistic concurrency; synchronization; transactional
memory",
}
@Article{Zyulkyarov:2010:DPU,
author = "Ferad Zyulkyarov and Tim Harris and Osman S. Unsal and
Adr{\'\i}an Cristal and Mateo Valero",
title = "Debugging programs that use atomic blocks and
transactional memory",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "57--66",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693463",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the emergence of research prototypes, programming
using atomic blocks and transactional memory (TM) is
becoming more attractive. This paper describes our
experience building and using a debugger for programs
written with these abstractions. We introduce three
approaches: ({\em i\/}) debugging at the level of
atomic blocks, where the programmer is shielded from
implementation details (such as exactly what kind of TM
is used, or indeed whether lock inference is used
instead), ({\em ii\/}) debugging at the level of
transactions, where conflict rates, read sets, write
sets, and other TM internals are visible, and ({\em
iii\/}) debug-time transactions, which let the
programmer manipulate synchronization from within the
debugger - e.g., enlarging the scope of an atomic block
to try to identify a bug.\par
In this paper we explain the rationale behind the new
debugging approaches that we propose. We describe the
design and implementation of an extension to the WinDbg
debugger, enabling support for C\# programs using
atomic blocks and TM. We also demonstrate the design of
a 'conflict point discovery' technique for identifying
program statements that introduce contention between
transactions. We illustrate how these techniques can be
used by optimizing a C\# version of the Genome
application from STAMP TM benchmark suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "debugging; transactional memory",
}
@Article{Dalessandro:2010:NSS,
author = "Luke Dalessandro and Michael F. Spear and Michael L.
Scott",
title = "{NOrec}: streamlining {STM} by abolishing ownership
records",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "67--78",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693464",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Drawing inspiration from several previous projects, we
present an ownership-record-free software transactional
memory (STM) system that combines extremely low
overhead with unusually clean semantics. While unlikely
to scale to hundreds of active threads, this 'NOrec'
system offers many appealing features: very low
fast-path latency--as low as any system we know of that
admits concurrent updates; publication and
privatization safety; livelock freedom; a small,
constant amount of global metadata, and full
compatibility with existing data structure layouts; no
false conflicts due to hash collisions; compatibility
with both managed and unmanaged languages, and both
static and dynamic compilation; and easy accommodation
of closed nesting, inevitable (irrevocable)
transactions, and starvation avoidance mechanisms. To
the best of our knowledge, no extant STM system
combines this set of features.\par
While transactional memory for processors with hundreds
of cores is likely to require hardware support,
software implementations will be required for backward
compatibility with current and near-future processors
with 2--64 cores, as well as for fall-back in future
machines when hardware resources are exhausted. Our
experience suggests that NOrec may be an ideal
candidate for such a software system. We also observe
that it has considerable appeal for use within the
operating system, and in systems that require both
closed nesting and publication safety.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "ownership records; software transactional memory;
transactional memory; transactional memory models",
}
@Article{Maldonado:2010:SST,
author = "Walther Maldonado and Patrick Marlier and Pascal
Felber and Adi Suissa and Danny Hendler and Alexandra
Fedorova and Julia L. Lawall and Gilles Muller",
title = "Scheduling support for transactional memory contention
management",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "79--90",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693465",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional Memory (TM) is considered as one of the
most promising paradigms for developing concurrent
applications. TM has been shown to scale well on
>multiple cores when the data access pattern behaves
'well,' i.e., when few conflicts are induced. In
contrast, data patterns with frequent write sharing,
with long transactions, or when many threads contend
for a smaller number of cores, result in numerous
conflicts. Until recently, TM implementations had
little control of transactional threads, which remained
under the supervision of the kernel's
transaction-ignorant scheduler. Conflicts are thus
traditionally resolved by consulting an STM-level {\em
contention manager}. Consequently, the contention
managers of these 'conventional' TM implementations
suffer from a lack of precision and often fail to
ensure reasonable performance in high-contention
workloads.\par
Recently, scheduling-based TM contention-management has
been proposed for increasing TM efficiency under
high-contention [2, 5, 19]. However, only user-level
schedulers have been considered. In this work, we
propose, implement and evaluate several novel
kernel-level scheduling support mechanisms for TM
contention management. We also investigate different
strategies for efficient communication between the
kernel and the user-level TM library. To the best of
our knowledge, our work is the first to investigate
kernel-level support for TM contention
management.\par
We have introduced kernel-level TM scheduling support
into both the Linux and Solaris kernels. Our
experimental evaluation demonstrates that lightweight
kernel-level scheduling support significantly reduces
the number of aborts while improving transaction
throughput on various workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "contention management; scheduling; transactional
memory",
}
@Article{Barreto:2010:LPN,
author = "Jo{\~a}o Barreto and Aleksandar Dragojevi{\'c} and
Paulo Ferreira and Rachid Guerraoui and Michal Kapalka",
title = "Leveraging parallel nesting in transactional memory",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "91--100",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693466",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Exploiting the emerging reality of affordable
multi-core architectures goes through providing
programmers with simple abstractions that would enable
them to easily turn their sequential programs into
concurrent ones that expose as much parallelism as
possible. While transactional memory promises to make
concurrent programming easy to a wide programmer
community, current implementations either disallow
nested transactions to run in parallel or do not scale
to arbitrary parallel nesting depths. This is an
important obstacle to the central goal of transactional
memory, as programmers can only start parallel threads
in restricted parts of their code.\par
This paper addresses the intrinsic difficulty behind
the support for parallel nesting in transactional
memory, and proposes a novel solution that, to the best
of our knowledge, is the first practical solution to
meet the lowest theoretical upper bound known for the
problem.\par
Using a synthetic workload configured to test parallel
transactions on a multi-core machine, a practical
implementation of our algorithm yields substantial
speed-ups (up to 22x with 33 threads) relatively to
serial nesting, and shows that the time to start and
commit transactions, as well as to detect conflicts, is
independent of nesting depth.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "fork-join; nested parallel programs; transactional
memory; work-stealing",
}
@Article{Torrellas:2010:ESC,
author = "Josep Torrellas and Bill Gropp and Jaime Moreno and
Kunle Olukotun and Vivek Sarkar",
title = "Extreme scale computing: challenges and
opportunities",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "101--102",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693468",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "architecture; challenges; exascale",
}
@Article{Arvind:2010:HI,
author = "Arvind",
title = "Is hardware innovation over?",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "103--104",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693455",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "My colleagues, promotion committees, research funding
agencies and business people often wonder if there is
need for any architecture research. There seems to be
no room to dislodge Intel IA-32. Even the number of new
Application-Specific Integrated Circuits (ASICs) seems
to be declining each year, because of the
ever-increasing development cost.\par
This viewpoint ignores another reality which is that
the future will be dominated by mobile devices such as
smart phones and the infrastructure needed to support
consumer services on these devices. This is already
restructuring the IT industry. To the first-order, in
the mobile world functionality is determined by what
can be supported within a 3W power budget. The only way
to reduce power by one to two orders of magnitude is
via functionally specialized hardware blocks. A
fundamental shift is needed in the current design flow
of systems-on-a-chip (SoCs) to produce them in a
less-risky and cost-effective manner.\par
In this talk we will present, via examples, a method of
designing systems that facilitates the synthesis of
complex SoCs from reusable 'IP' modules. The technical
challenge is to provide a method for connecting modules
in a parallel setting so that the functionality and the
performance of the composite are predictable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "hardware innovation; system-on-chip",
}
@Article{Baghsorkhi:2010:APM,
author = "Sara S. Baghsorkhi and Matthieu Delahaye and Sanjay J.
Patel and William D. Gropp and Wen-mei W. Hwu",
title = "An adaptive performance modeling tool for {GPU}
architectures",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "105--114",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693470",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents an analytical model to predict the
performance of\par
general-purpose applications on a GPU architecture. The
model is designed to provide performance information to
an auto-tuning compiler and assist it in narrowing down
the search to the more promising implementations. It
can also be incorporated into a tool to help
programmers better assess the performance bottlenecks
in their code. We analyze each GPU kernel and identify
how the kernel exercises major GPU microarchitecture
features. To identify the performance bottlenecks
accurately, we introduce an abstract interpretation of
a GPU kernel, {\em work flow graph}, based on which we
estimate the execution time of a GPU kernel. We
validated our performance model on the NVIDIA GPUs
using CUDA (Compute Unified Device Architecture). For
this purpose, we used data parallel benchmarks that
stress different GPU microarchitecture events such as
uncoalesced memory accesses, scratch-pad memory bank
conflicts, and control flow divergence, which must be
accurately modeled but represent challenges to the
analytical performance models. The proposed model
captures full system complexity and shows high accuracy
in predicting the performance trends of different
optimized kernel implementations. We also describe our
approach to extracting the performance model
automatically from a kernel code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "analytical model; GPU; parallel programming;
performance estimation",
}
@Article{Choi:2010:MDA,
author = "Jee W. Choi and Amik Singh and Richard W. Vuduc",
title = "Model-driven autotuning of sparse matrix-vector
multiply on {GPUs}",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "115--126",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693471",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a performance model-driven framework for
automated performance tuning (autotuning) of sparse
matrix-vector multiply (SpMV) on systems accelerated by
graphics processing units (GPU). Our study consists of
two parts.\par
First, we describe several carefully hand-tuned SpMV
implementations for GPUs, identifying key GPU-specific
performance limitations, enhancements, and tuning
opportunities. These implementations, which include
variants on classical blocked compressed sparse row
(BCSR) and blocked ELLPACK (BELLPACK) storage formats,
match or exceed state-of-the-art implementations. For
instance, our best BELLPACK implementation achieves up
to 29.0 Gflop/s in single-precision and 15.7 Gflop/s in
double-precision on the NVIDIA T10P multiprocessor
(C1060), enhancing prior state-of-the-art unblocked
implementations (Bell and Garland, 2009) by up to
1.8\times and 1.5\times for single-and double-precision
respectively.\par
However, achieving this level of performance requires
input matrix-dependent parameter tuning. Thus, in the
second part of this study, we develop a performance
model that can guide tuning. Like prior autotuning
models for CPUs (e.g., Im, Yelick, and Vuduc, 2004),
this model requires offline measurements and run-time
estimation, but more directly models the structure of
multithreaded vector processors like GPUs. We show that
our model can identify the implementations that achieve
within 15\% of those found through exhaustive search.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "GPU; performance modeling; sparse matrix-vector
multiplication",
}
@Article{Zhang:2010:FTS,
author = "Yao Zhang and Jonathan Cohen and John D. Owens",
title = "Fast tridiagonal solvers on the {GPU}",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "127--136",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693472",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study the performance of three parallel algorithms
and their hybrid variants for solving tridiagonal
linear systems on a GPU: cyclic reduction (CR),
parallel cyclic reduction (PCR) and recursive doubling
(RD). We develop an approach to measure, analyze, and
optimize the performance of GPU programs in terms of
memory access, computation, and control overhead. We
find that CR enjoys linear algorithm complexity but
suffers from more algorithmic steps and bank conflicts,
while PCR and RD have fewer algorithmic steps but do
more work each step. To combine the benefits of the
basic algorithms, we propose hybrid CR+PCR and CR+RD
algorithms, which improve the performance of PCR, RD
and CR by 21\%, 31\% and 61\% respectively. Our GPU
solvers achieve up to a 28x speedup over a sequential
LAPACK solver, and a 12x speedup over a multi-threaded
CPU solver.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "GPGPU; performance optimization; tridiagonal linear
system",
}
@Article{Sandes:2010:CUG,
author = "Edans Flavius O. Sandes and Alba Cristina M. A. de
Melo",
title = "{CUDAlign}: using {GPU} to accelerate the comparison
of megabase genomic sequences",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "137--146",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693473",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Biological sequence comparison is a very important
operation in Bioinformatics. Even though there do exist
exact methods to compare biological sequences, these
methods are often neglected due to their quadratic time
and space complexity. In order to accelerate these
methods, many GPU algorithms were proposed in the
literature. Nevertheless, all of them restrict the size
of the smallest sequence in such a way that Megabase
genome comparison is prevented. In this paper, we
propose and evaluate CUDAlign, a GPU algorithm that is
able to compare Megabase biological sequences with an
exact Smith--Waterman affine gap variant. CUDAlign was
implemented in CUDA and tested in two GPU boards,
separately. For real sequences whose size range from
1MBP (Megabase Pairs) to 47MBP, a close to uniform
GCUPS (Giga Cells Updates per Second) was obtained,
showing the potential scalability of our approach.
Also, CUDAlign was able to compare the human chromosome
21 and the chimpanzee chromosome 22. This operation
took 21 hours on GeForce GTX 280, resulting in a peak
performance of 20.375 GCUPS. As far as we know, this is
the first time such huge chromosomes are compared with
an exact method.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "biological sequence comparison; GPU; Smith--Waterman",
}
@Article{Hofmeyr:2010:LBS,
author = "Steven Hofmeyr and Costin Iancu and Filip
Blagojevi{\'c}",
title = "Load balancing on speed",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "147--158",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693475",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To fully exploit multicore processors, applications
are expected to provide a large degree of thread-level
parallelism. While adequate for low core counts and
their typical workloads, the current load balancing
support in operating systems may not be able to achieve
efficient hardware utilization for parallel workloads.
Balancing run queue length globally ignores the needs
of parallel applications where threads are required to
make equal progress. In this paper we present a load
balancing technique designed specifically for parallel
applications running on multicore systems. Instead of
balancing run queue length, our algorithm balances the
time a thread has executed on ``faster'' and ``slower''
cores. We provide a user level implementation of speed
balancing on UMA and NUMA multi-socket architectures
running Linux and discuss behavior across a variety of
workloads, usage scenarios and programming models. Our
results indicate that speed balancing when compared to
the native Linux load balancing improves performance
and provides good performance isolation in all cases
considered. Speed balancing is also able to provide
comparable or better performance than DWRR, a fair
multi-processor scheduling implementation inside the
Linux kernel. Furthermore, parallel application
performance is often determined by the implementation
of synchronization operations and speed balancing
alleviates the need for tuning the implementations of
such primitives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "load balancing; operating systems; parallel
applications",
}
@Article{Hoefler:2010:SCP,
author = "Torsten Hoefler and Christian Siebert and Andrew
Lumsdaine",
title = "Scalable communication protocols for dynamic sparse
data exchange",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "159--168",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693476",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many large-scale parallel programs follow a bulk
synchronous parallel (BSP) structure with distinct
computation and communication phases. Although the
communication phase in such programs may involve all
(or large numbers) of the participating processes, the
actual communication operations are usually sparse in
nature. As a result, communication phases are typically
expressed explicitly using point-to-point communication
operations or collective operations. We define the
dynamic sparse data-exchange (DSDE) problem and derive
bounds in the well known LogGP model. While current
approaches work well with static applications, they run
into limitations as modern applications grow in scale,
and as the problems that are being solved become
increasingly irregular and dynamic.\par
To enable the compact and efficient expression of the
communication phase, we develop suitable sparse
communication protocols for irregular applications at
large scale. We discuss different irregular
applications and show the sparsity in the communication
for real-world input data. We discuss the time and
memory complexity of commonly used protocols for the
DSDE problem and develop {\em NBX\/} --a novel fast
algorithm with constant memory overhead for solving it.
Algorithm {\em NBX\/} improves the runtime of a sparse
data-exchange among 8,192 processors on BlueGene/P by a
factor of 5.6. In an application study, we show
improvements of up to a factor of 28.9 for a parallel
breadth first search on 8,192 BlueGene/P processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "alltoall; distributed termination; irregular
algorithms; nonblocking collective operations; sparse
data exchange",
}
@Article{Romein:2010:LCI,
author = "John W. Romein and P. Chris Broekema and Jan David Mol
and Rob V. van Nieuwpoort",
title = "The {LOFAR} correlator: implementation and performance
analysis",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "169--178",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693477",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "LOFAR is the first of a new generation of radio
telescopes. Rather than using expensive dishes, it
forms a distributed sensor network that combines the
signals from many thousands of simple antennas. Its
revolutionary design allows observations in a frequency
range that has hardly been studied before.\par
Another novel feature of LOFAR is the elaborate use of
{\em software\/} to process data, where traditional
telescopes use customized hardware. This dramatically
increases flexibility and substantially reduces costs,
but the high processing and bandwidth requirements
compel the use of a supercomputer. The antenna signals
are centrally combined, filtered, optionally
beam-formed, and correlated by an IBM Blue
Gene/P.\par
This paper describes the implementation of the
so-called correlator. To meet the real-time
requirements, the application is highly optimized, and
reaches exceptionally high computational and I/O
efficiencies. Additionally, we study the scalability of
the system, and show that it scales well beyond the
requirements. The optimizations allows us to use only
half the planned amount of resources, {\em and\/}
process 50\% more telescope data, significantly
improving the effectiveness of the entire telescope.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "correlator; IBM Blue Gene/P; LOFAR",
}
@Article{Tzannes:2010:LBS,
author = "Alexandros Tzannes and George C. Caragea and Rajeev
Barua and Uzi Vishkin",
title = "Lazy binary-splitting: a run-time adaptive
work-stealing scheduler",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "179--190",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693479",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Lazy Binary Splitting (LBS), a user-level
scheduler of nested parallelism for shared-memory
multiprocessors that builds on existing Eager Binary
Splitting work-stealing (EBS) implemented in Intel's
Threading Building Blocks (TBB), but improves
performance and ease-of-programming. In its simplest
form (SP), EBS requires manual tuning by repeatedly
running the application under carefully controlled
conditions to determine a {\em stop-splitting-threshold
(sst)\/} for every do-all loop in the code. This
threshold limits the parallelism and prevents excessive
overheads for fine-grain parallelism. Besides being
tedious, this tuning also over-fits the code to some
particular dataset, platform and calling context of the
do-all loop, resulting in poor performance portability
for the code. LBS overcomes both the performance
portability and ease-of-programming pitfalls of a
manually fixed threshold by adapting dynamically to
run-time conditions without requiring tuning.\par
We compare LBS to Auto-Partitioner (AP), the latest
default scheduler of TBB, which does not require manual
tuning either but lacks context portability, and
outperform it by 38.9\% using TBB's default AP
configuration, and by 16.2\% after we tuned AP to our
experimental platform. We also compare LBS to SP by
manually finding SP's sst using a training dataset and
then running both on a different execution dataset. LBS
outperforms SP by 19.5\% on average. while allowing for
improved performance portability without requiring
tedious manual tuning. LBS also outperforms SP with
{\em sst=1}, its default value when undefined, by
56.7\%, and serializing work-stealing (SWS), another
work-stealer by 54.7\%. Finally, compared to
serializing inner parallelism (SI) which has been used
by OpenMP, LBS is 54.2\% faster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic scheduling; load balancing; nested
parallelism; thread scheduling; work stealing",
}
@Article{Radojkovic:2010:TSB,
author = "Petar Radojkovi{\'c} and Vladimir {\v{C}}akarevi{\'c}
and Javier Verd{\'u} and Alex Pajuelo and Francisco
J. Cazorla and Mario Nemirovsky and Mateo Valero",
title = "Thread to strand binding of parallel network
applications in massive multi-threaded systems",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "191--202",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693480",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In processors with several levels of hardware resource
sharing,like CMPs in which each core is an SMT, the
scheduling process becomes more complex than in
processors with a single level of resource sharing,
such as pure-SMT or pure-CMP processors. Once the
operating system selects the set of applications to
simultaneously schedule on the processor (workload),
each application/thread must be assigned to one of the
hardware contexts(strands). We call this last
scheduling step the Thread to Strand Binding or TSB. In
this paper, we show that the TSB impact on the
performance of processors with several levels of shared
resources is high. We measure a variation of up to 59\%
between different TSBs of real multithreaded network
applications running on the UltraSPARC T2 processor
which has three levels of resource sharing. In our
view, this problem is going to be more acute in future
multithreaded architectures comprising more cores, more
contexts per core, and more levels of resource
sharing.\par
We propose a resource-sharing aware TSB algorithm
(TSBSched) that significantly facilitates the problem
of thread to strand binding for software-pipelined
applications, representative of multithreaded network
applications. Our systematic approach encapsulates
both, the characteristics of multithreaded processors
under the study and the structure of the software
pipelined applications. Once calibrated for a given
processor architecture, our proposal does not require
hardware knowledge on the side of the programmer, nor
extensive profiling of the application. We validate our
algorithm on the UltraSPARC T2 processor running a set
of real multithreaded network applications on which we
report improvements of up to 46\% compared to the
current state-of-the-art dynamic schedulers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "CMT; process scheduling; simultaneous multithreading;
UltraSPARC T2",
}
@Article{Zhang:2010:DCS,
author = "Eddy Z. Zhang and Yunlian Jiang and Xipeng Shen",
title = "Does cache sharing on modern {CMP} matter to the
performance of contemporary multithreaded programs?",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "203--212",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693482",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most modern Chip Multiprocessors (CMP) feature shared
cache on chip. For multithreaded applications, the
sharing reduces communication latency among co-running
threads, but also results in cache contention.\par
A number of studies have examined the influence of
cache sharing on multithreaded applications, but most
of them have concentrated on the design or management
of shared cache, rather than a systematic measurement
of the influence. Consequently, prior measurements have
been constrained by the reliance on simulators, the use
of out-of-date benchmarks, and the limited coverage of
deciding factors. The influence of CMP cache sharing on
contemporary multithreaded applications remains
preliminarily understood.\par
In this work, we conduct a systematic measurement of
the influence on two kinds of commodity CMP machines,
using a recently released CMP benchmark suite, PARSEC,
with a number of potentially important factors on
program, OS, and architecture levels considered. The
measurement shows some surprising results. Contrary to
commonly perceived importance of cache sharing, neither
positive nor negative effects from the cache sharing
are significant for most of the program executions,
regardless of the types of parallelism, input datasets,
architectures, numbers of threads, and assignments of
threads to cores. After a detailed analysis, we find
that the main reason is the mismatch of current
development and compilation of multithreaded
applications and CMP architectures. By transforming the
programs in a cache-sharing-aware manner, we observe up
to 36\% performance increase when the threads are
placed on cores appropriately.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "chip multiprocessors; parallel program optimizations;
shared cache; thread scheduling",
}
@Article{Liu:2010:IPL,
author = "Lixia Liu and Zhiyuan Li",
title = "Improving parallelism and locality with asynchronous
algorithms",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "213--222",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693483",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As multicore chips become the main building blocks for
high performance computers, many numerical applications
face a performance impediment due to the limited
hardware capacity to move data between the CPU and the
off-chip memory. This is especially true for large
computing problems solved by iterative algorithms
because of the large data set typically used. Loop
tiling, also known as loop blocking, was shown
previously to be an effective way to enhance data
locality, and hence to reduce the memory bandwidth
pressure, for a class of iterative algorithms executed
on a single processor. Unfortunately, the tiled
programs suffer from reduced parallelism because only
the loop iterations within a single tile can be easily
parallelized. In this work, we propose to use the
asynchronous model to enable effective loop tiling such
that both parallelism and locality can be attained
simultaneously. Asynchronous algorithms were previously
proposed to reduce the communication cost and
synchronization overhead between processors. Our new
discovery is that carefully controlled asynchrony and
loop tiling can significantly improve the performance
of parallel iterative algorithms on multicore
processors due to simultaneously attained data locality
and loop-level parallelism. We present supporting
evidence from experiments with three well-known
numerical kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "asynchronous algorithms; data locality; loop tiling;
memory performance; parallel numerical programs",
}
@Article{Castaldo:2010:SLP,
author = "Anthony M. Castaldo and R. Clint Whaley",
title = "Scaling {LAPACK} panel operations using parallel cache
assignment",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "223--232",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693484",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In LAPACK many matrix operations are cast as block
algorithms which iteratively process a panel using an
unblocked algorithm and then update a remainder matrix
using the high performance Level 3 BLAS. The Level 3
BLAS have excellent weak scaling, but panel processing
tends to be bus bound, and thus scales with bus speed
rather than the number of processors ({\em p\/}).
Amdahl's law therefore ensures that as {\em p\/} grows,
the panel computation will become the dominant cost of
these LAPACK routines. Our contribution is a novel
parallel cache assignment approach which we show scales
well with {\em p}. We apply this general approach to
the QR and LU panel factorizations on two commodity
8-core platforms with very different cache structures,
and demonstrate superlinear panel factorization
speedups on both machines. Other approaches to this
problem demand complicated reformulations of the
computational approach, new kernels to be tuned, new
mathematics, an inflation of the high-order flop count,
and do not perform as well. By demonstrating a
straight-forward alternative that avoids all of these
contortions and scales with {\em p}, we address a
critical stumbling block for dense linear algebra in
the age of massive parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "Atlas; factorization; GPU; LAPACK; LU; multicore;
multi-core; parallel; QR",
}
@Article{Sutherland:2010:CTC,
author = "Dean F. Sutherland and William L. Scherlis",
title = "Composable thread coloring",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "233--244",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693485",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces the language-independent concept
of ``thread usage policy.'' Many multi-threaded
software systems contain policies that regulate
associations among threads, executable code, and
potentially shared state. A system, for example, may
constrain which threads are permitted to execute
particular code segments, usually as a means to
constrain those threads from accessing or writing
particular elements of state. These policies ensure
properties such as state confinement or reader/writer
constraints, often without recourse to locking or
transaction discipline.\par
Our approach allows developers to concisely document
their thread usage policies in a manner that enables
the use of sound scalable analysis to assess
consistency of policy and as-written code. This paper
identifies the key semantic concepts of our thread
coloring language and illustrates how to use its
succinct source-level annotations to express models of
thread usage policies, following established annotation
conventions for Java.\par
We have built a prototype static analysis tool,
implemented as an integrated development environment
plug-in (for the Eclipse IDE), that notifies developers
of discrepancies between policy annotations and
as-written code. Our analysis technique uses several
underlying algorithms based on abstract interpretation,
call-graphs, and type inference. The resulting overall
analysis is both sound and composable. We have used
this prototype analysis tool in case studies to model
and analyze more than a million lines of code.\par
Our validation process included field trials on a wide
variety of complex large-scale production code selected
by the host organizations. Our in-field experience led
us to focus on potential adoptability by real-world
developers. We have developed techniques that can
reduce annotation density to less than one line per
thousand lines of code (KLOC). In addition, the
prototype analysis tool supports an incremental and
iterative approach to modeling and analysis. This
approach enabled field trial partners to directly
target areas of greatest concern and to achieve useful
results within a few hours.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "annotation; Java; keywords: state consistency;
multicore; race conditions; state confinement; thread
policy",
}
@Article{Agrawal:2010:HLF,
author = "Kunal Agrawal and Charles E. Leiserson and Jim
Sukha",
title = "Helper locks for fork-join parallel programming",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "245--256",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693487",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Helper locks allow programs with large parallel
critical sections, called parallel regions, to execute
more efficiently by enlisting processors that might
otherwise be waiting on the helper lock to aid in the
execution of the parallel region. Suppose that a
processor {\em p\/} is executing a parallel region {\em
A\/} after having acquired the lock {\em L\/}
protecting {\em A}. If another processor {\em p\/}
$\prime$ tries to acquire {\em L}, then instead of
blocking and waiting for {\em p\/} to complete {\em A},
processor {\em p\/} $\prime$ joins {\em p\/} to help it
complete {\em A}. Additional processors not blocked on
{\em L\/} may also help to execute {\em A}.\par
The HELPER runtime system can execute fork-join
computations augmented with helper locks and parallel
regions. HELPER supports the unbounded nesting of
parallel regions. We provide theoretical
completion-time and space-usage bounds for a design of
HELPER based on work stealing. Specifically, let {\em
V\/} be the number of parallel regions in a
computation, let {\em T\/}$_1$ be its work, and let
{\em T\/} $\infty$ be its `aggregate span' --- the sum
of the spans (critical-path lengths) of all its
parallel regions. We prove that HELPER completes the
computation in expected time {\em O\/} ({\em T\/}$_1$ /
{\em P\/} P + {\em T\/} $\infty$+ {\em PV\/}) on {\em
P\/} processors. This bound indicates that programs
with a small number of highly parallel critical
sections can attain linear speedup. For the space
bound, we prove that HELPER completes a program using
only $O(P S_1)$ stack space, where $S_1$ is the sum,
over all regions, of the stack space used by each
region in a serial execution. Finally, we describe a
prototype of HELPER implemented by modifying the Cilk
multithreaded runtime system. We used this prototype to
implement a concurrent hash table with a resize
operation protected by a helper lock.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "Cilk; dynamic multithreading; helper lock; nested
parallelism; parallel region; scheduling; work
stealing",
}
@Article{Bronson:2010:PCB,
author = "Nathan G. Bronson and Jared Casper and Hassan Chafi
and Kunle Olukotun",
title = "A practical concurrent binary search tree",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "257--268",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693488",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a concurrent relaxed balance AVL tree
algorithm that is fast, scales well, and tolerates
contention. It is based on optimistic techniques
adapted from software transactional memory, but takes
advantage of specific knowledge of the algorithm to
reduce overheads and avoid unnecessary retries. We
extend our algorithm with a fast linearizable clone
operation, which can be used for consistent iteration
of the tree. Experimental evidence shows that our
algorithm outperforms a highly tuned concurrent skip
list for many access patterns, with an average of 39\%
higher single-threaded throughput and 32\% higher
multi-threaded throughput over a range of contention
levels and operation mixes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "optimistic concurrency; snapshot isolation",
}
@Article{Tallent:2010:ALC,
author = "Nathan R. Tallent and John M. Mellor-Crummey and Allan
Porterfield",
title = "Analyzing lock contention in multithreaded
applications",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "269--280",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693489",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many programs exploit shared-memory parallelism using
multithreading. Threaded codes typically use locks to
coordinate access to shared data. In many cases,
contention for locks reduces parallel efficiency and
hurts scalability. Being able to quantify and attribute
lock contention is important for understanding where a
multithreaded program needs improvement.\par
This paper proposes and evaluates three strategies for
gaining insight into performance losses due to lock
contention. First, we consider using a straightforward
strategy based on call stack profiling to attribute
idle time and show that it fails to yield insight into
lock contention. Second, we consider an approach that
builds on a strategy previously used for analyzing
idleness in work-stealing computations; we show that
this strategy does not yield insight into lock
contention. Finally, we propose a new technique for
measurement and analysis of lock contention that uses
data associated with locks to blame lock holders for
the idleness of spinning threads. Our approach incurs
$\leq$ 5\% overhead on a quantum chemistry application
that makes extensive use of locking (65M distinct
locks, a maximum of 340K live locks, and an average of
30K lock acquisitions per second per thread) and
attributes lock contention to its full static and
dynamic calling contexts. Our strategy, implemented in
HPCToolkit, is fully distributed and should scale well
to systems with large core counts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "HPCToolkit; lock contention; multithreading;
performance analysis",
}
@Article{Upadhyaya:2010:UDS,
author = "Gautam Upadhyaya and Samuel P. Midkiff and Vijay S.
Pai",
title = "Using data structure knowledge for efficient lock
generation and strong atomicity",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "281--292",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693490",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To achieve high-performance on multicore systems,
sharedmemory parallel languages must efficiently
implement atomic operations. The commonly used and
studied paradigms for atomicity are fine-grained
locking, which is both difficult to program and
error-prone; optimistic software transactions, which
require substantial overhead to detect and recover from
atomicity violations; and compiler-generation of locks
from programmer-specified atomic sections, which leads
to serialization whenever imprecise pointer analysis
suggests the mere possibility of a conflicting
operation. This paper presents a new strategy for
compiler-generated locking that uses data structure
knowledge to facilitate more precise alias and lock
generation analyses and reduce unnecessary
serialization. Implementing and evaluating these ideas
in the Java language shows that the new strategy
achieves eight-thread speedups of 0.83 to 5.9 for the
five STAMP benchmarks studied, outperforming software
transactions on all but one benchmark, and nearly
matching programmer-specified fine-grained locks on all
but one benchmark. The results also indicate that
compiler knowledge of data structures improves the
effectiveness of compiler analysis, boosting
eight-thread performance by up to 300\%. Further, the
new analysis allows for software support of strong
atomicity with less than 1\% overhead for two
benchmarks and less than 20\% for three others. The
strategy also nearly matches the performance of
programmer-specified fine-grained locks for the
SPECjbb2000 benchmark, which has traditionally not been
amenable to static analyses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "automatic lock generation; parallel programming;
transactional memory",
}
@Article{Ali:2010:MAC,
author = "Qasim Ali and Samuel Pratt Midkiff and Vijay S. Pai",
title = "Modeling advanced collective communication algorithms
on {Cell}-based systems",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "293--304",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693492",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents and validates performance models
for a variety of high-performance collective
communication algorithms for systems with Cell
processors. The systems modeled include a single Cell
processor, two Cell chips on a Cell Blade, and a
cluster of Cell Blades. The models extend PLogP, the
well-known point-to-point performance model, by
accounting for the unique hardware characteristics of
the Cell (e.g., heterogeneous interconnects and DMA
engines) and by applying the model to collective
communication. This paper also presents a
micro-benchmark suite to accurately measure the
extended PLogP parameters on the Cell Blade and then
uses these parameters to model different algorithms for
the {\em barrier, broadcast, reduce, all-reduce}, and
{\em all-gather\/} collective operations. Out of 425
total performance predictions, 398 of them see less
than 10\% error compared to the actual execution time
and all of them see less than 15\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "algorithms; collective communication; modeling",
}
@Article{Zhai:2010:PPP,
author = "Jidong Zhai and Wenguang Chen and Weimin Zheng",
title = "{PHANTOM}: predicting performance of parallel
applications on large-scale parallel machines using a
single node",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "305--314",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693493",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For designers of large-scale parallel computers, it is
greatly desired that performance of parallel
applications can be predicted at the design phase.
However, this is difficult because the execution time
of parallel applications is determined by several
factors, including sequential computation time in each
process, communication time and their convolution.
Despite previous efforts, it remains an open problem to
estimate sequential computation time in each process
accurately and efficiently for large-scale parallel
applications on non-existing target machines.\par
This paper proposes a novel approach to predict the
sequential computation time accurately and efficiently.
We assume that there is at least one node of the target
platform but the whole target system need not be
available. We make two main technical contributions.
First, we employ deterministic replay techniques to
execute any process of a parallel application on a
single node at real speed. As a result, we can simply
measure the real sequential computation time on a
target node for each process one by one. Second, we
observe that computation behavior of processes in
parallel applications can be clustered into a few
groups while processes in each group have similar
computation behavior. This observation helps us reduce
measurement time significantly because we only need to
execute representative parallel processes instead of
all of them.\par
We have implemented a performance prediction framework,
called PHANTOM, which integrates the above
computation-time acquisition approach with a
trace-driven network simulator. We validate our
approach on several platforms. For ASCI Sweep3D, the
error of our approach is less than 5\% on 1024
processor cores. Compared to a recent regression-based
prediction approach, PHANTOM presents better prediction
accuracy across different platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "deterministic replay; parallel application;
performance prediction; trace-driven simulation",
}
@Article{Aleen:2010:IDD,
author = "Farhana Aleen and Monirul Sharif and Santosh Pande",
title = "Input-driven dynamic execution prediction of streaming
applications",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "315--324",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693494",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Streaming applications are promising targets for
effectively utilizing multicores because of their
inherent amenability to pipelined parallelism. While
existing methods of orchestrating streaming programs on
multicores have mostly been static, real-world
applications show ample variations in execution time
that may cause the achieved speedup and throughput to
be sub-optimal. One of the principle challenges for
moving towards dynamic orchestration has been the lack
of approaches that can predict or accurately estimate
upcoming dynamic variations in execution efficiently,
well before they occur.\par
In this paper, we propose an automated dynamic
execution behavior prediction approach that can be used
to efficiently estimate the time that will be spent in
different pipeline stages for upcoming inputs without
requiring program execution. This enables dynamic
balancing or scheduling of execution to achieve better
speedup. Our approach first uses dynamic taint analysis
to automatically generates an input-based execution
characterization of the streaming program, which
identifies the key control points where variation in
execution might occur with the associated input
elements that cause these variations. We then
automatically generate a light-weight emulator from the
program using this characterization that can simulate
the execution paths taken for new streaming inputs and
provide an estimate of execution time that will be
spent in processing these inputs, enabling prediction
of possible dynamic variations. We present experimental
evidence that our technique can accurately and
efficiently estimate execution behaviors for several
benchmarks. Our experiments show that dynamic
orchestration using our predicted execution behavior
can achieve considerably higher speedup than static
orchestration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic execution; parallelization; software
pipeline",
}
@Article{Lupei:2010:TST,
author = "Daniel Lupei and Bogdan Simion and Don Pinto and
Matthew Misler and Mihai Burcea and William Krick and
Cristiana Amza",
title = "Towards scalable and transparent parallelization of
multiplayer games using transactional memory support",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "325--326",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693496",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This work addresses the problem of parallelizing
multiplayer games using {\em software\/} Transactional
Memory (STM) support. Using a realistic high impact
application, we show that STM provides not only ease of
programming, but also {\em better\/} performance than
that achievable with state-of-the-art lock-based
programming.\par
Towards this goal, we use SynQuake, a game benchmark
which extracts the main data structures and the
essential features of the popular multiplayer game
Quake, but can be driven with a synthetic workload
generator that flexibly emulates client game actions
and various hot-spot scenarios in the game
world.\par
We implement, evaluate and compare the STM version of
SynQuake with a state-of-the-art lock-based
parallelization of Quake, which we ported to SynQuake.
While in STM-SynQuake support for maintaining the
consistency of each potentially complex game action is
automatic, conservative locking of surrounding objects
within a bounding box for the duration of the game
action is inherently needed in lock-based SynQuake.
This leads to a higher scalability factor of
STM-SynQuake versus lock-based SynQuake, due to a
higher degree of false sharing in the latter.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "massively multiplayer games; scalability; software
transactional memory; synchronization",
}
@Article{Perarnau:2010:KRC,
author = "Swann Perarnau and Guillaume Huard",
title = "{KRASH}: reproducible {CPU} load generation on many
cores machines",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "327--328",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693497",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this article we present KRASH, a tool for
reproducible generation of system-level CPU load. This
tool is intended for use in shared memory machines
equipped with multiple CPU cores which are usually
exploited concurrently by several users. The objective
of KRASH is to enable parallel application developers
to validate their resources use strategies on a
partially loaded machine by {\em replaying\/} an
observed load in concurrence with their application. To
reach this objective, we present a method for CPU load
generation which behaves as realistically as possible:
the resulting load is similar to the load that would be
produced by concurrent processes run by other users.
Nevertheless, contrary to a simple run of a
CPU-intensive application, KRASH is not sensitive to
system scheduling decisions. The main benefit brought
by KRASH is this reproducibility: no matter how many
processes are present in the system the load generated
by our tool strictly respects a given load profile. To
our knowledge, KRASH is the only tool that implements
the generation of a dynamic load profile (a load
varying with time). When used to generate a constant
load, KRASH result is among the most realistic ones.
Furthermore, KRASH provides more flexibility than other
tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "CPU load generation; experimentation testbed; many
cores",
}
@Article{Muralidhara:2010:IAS,
author = "Sai Prashanth Muralidhara and Mahmut Kandemir and
Padma Raghavan",
title = "Intra-application shared cache partitioning for
multithreaded applications",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "329--330",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693498",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we address the problem of partitioning
a shared cache when the executing threads belong to the
same application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cache; multicore; parallel applications",
}
@Article{Dash:2010:SPT,
author = "Alokika Dash and Brian Demsky",
title = "Symbolic prefetching in transactional distributed
shared memory",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "331--332",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693499",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a static analysis for the automatic
generation of symbolic prefetches in a transactional
distributed shared memory. A symbolic prefetch
specifies the first object to be prefetched followed by
a list of field offsets or array indices that define a
path through the heap. We also provide an object
caching framework and language extensions to support
our approach. To our knowledge, this is the first
prefetching approach that can prefetch objects whose
addresses have not been computed or predicted.\par
Our approach makes aggressive use of both prefetching
and caching of remote objects to hide network latency.
It relies on the transaction commit mechanism to
preserve the simple transactional consistency model
that we present to the developer. We have evaluated
this approach on several shared memory parallel
benchmarks and a distributed gaming benchmark to
observe speedups due to prefetching and caching.
Categories and Subject Descriptors",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "distributed shared memory; symbolic prefetching;
transactional memory",
}
@Article{Chakrabarti:2010:NAE,
author = "Dhruva R. Chakrabarti",
title = "New abstractions for effective performance analysis of
{STM} programs",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "333--334",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693500",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the design and implementation of a dynamic
conflict graph annotated with fine grain transaction
characteristics and show that this is important
information for effective performance analysis of a
software transactional memory (STM) program. We show
how to implement the necessary support in a compiler
and an STM with minimal perturbation of the original
behavior of the application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "concurrency; software transactional memory",
}
@Article{Zhang:2010:CSP,
author = "Chao Zhang and Chen Ding and Xiaoming Gu and Kirk
Kelsey and Tongxin Bai and Xiaobing Feng",
title = "Continuous speculative program parallelization in
software",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "335--336",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693501",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper addresses the problem of extracting
coarse-grained parallelism from large sequential code.
It builds on BOP, a system for software speculative
parallelization. BOP lets a user to mark possibly
parallel regions (PPR) in a program and at run-time
speculatively executes PPR instances using Unix
processes. This short paper presents a new run-time
support called continuous speculation, which fully
utilizes available parallelism to tolerate differences
in PPR task size and processor speed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "software speculative parallelization",
}
@Article{Marjanovic:2010:ECC,
author = "Vladimir Marjanovic and Jes{\'u}s Labarta and Eduard
Ayguad{\'e} and Mateo Valero",
title = "Effective communication and computation overlap with
hybrid {MPI\slash SMPSs}",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "337--338",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693502",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Communication overhead is one of the dominant factors
affecting performance in high-performance computing
systems. To reduce the negative impact of
communication, programmers overlap communication and
computation by using asynchronous communication
primitives. This increases code complexity, requiring
more development effort and making less readable
programs. This paper presents the hybrid use of MPI and
SMPSs (SMP superscalar, a task-based shared-memory
programming model) that allows the programmer to easily
introduce the asynchrony necessary to overlap
communication and computation. We demonstrate the
hybrid use of MPI/SMPSs with the high-performance
LINPACK benchmark (HPL), and compare it to the pure MPI
implementation, which uses the look-ahead technique to
overlap communication and computation. The hybrid
MPI/SMPSs version significantly improves the
performance of the pure MPI version, getting close to
the asymptotic performance at medium problem sizes and
still getting significant benefits at small/large
problem sizes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "hybrid MPI/SMPSs; LINPACK; MPI; parallel programming
model",
}
@Article{Cederman:2010:SLF,
author = "Daniel Cederman and Philippas Tsigas",
title = "Supporting lock-free composition of concurrent data
objects",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "339--340",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693503",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Lock-free data objects offer several advantages over
their blocking counterparts, such as being immune to
deadlocks and convoying and, more importantly, being
highly concurrent. But they share a common disadvantage
in that the operations they provide are difficult to
compose into larger atomic operations while still
guaranteeing lock-freedom. We present a lock-free
methodology for composing highly concurrent
linearizable objects together by unifying their
linearization points. This makes it possible to
relatively easily introduce atomic lock-free move
operations to a wide range of concurrent objects.
Experimental evaluation has shown that the operations
originally supported by the data objects keep their
performance behavior under our methodology.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "composition; data structures; lock-free",
}
@Article{Guo:2010:SSL,
author = "Yi Guo and Jisheng Zhao and Vincent Cave and Vivek
Sarkar",
title = "{SLAW}: a scalable locality-aware adaptive
work-stealing scheduler for multi-core systems",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "341--342",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693504",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This poster introduces SLAW, a Scalable Locality-aware
Adaptive Work-stealing scheduler. The SLAW features an
adaptive task scheduling algorithm combined with a
locality-aware scheduling framework.\par
Past work has demonstrated the pros and cons of using
fixed scheduling policies, such as {\em work-first\/}
and {\em help-first}, in different cases without a
clear winner. Prior work also assumes the availability
and successful execution of a serial version of the
parallel program. This assumption can limit the
expressiveness of dynamic task parallel
languages.\par
The SLAW scheduler supports both work-first and
help-first policies simultaneously. It does so by using
an {\em adaptive\/} approach that selects a scheduling
policy on a per-task basis at runtime. The SLAW
scheduler also establishes bounds on the stack usage
and the heap space needed to store tasks. The
experimental results for the benchmarks studied show
that SLAW's adaptive scheduler achieves 0.98x - 9.2x
speedup over the help-first scheduler and 0.97x - 4.5x
speedup over the work-first scheduler for 64-thread
executions, thereby establishing the robustness of
using an adaptive approach instead of a fixed policy.
In contrast, the help-first policy is 9.2x slower than
work-first in the worst case for a fixed help-first
policy, and the work-first policy is 3.7x slower than
help-first in the worst case for a fixed work-first
policy. Further, for large irregular recursive parallel
computations, the adaptive scheduler runs with bounded
stack usage and achieves performance (and supports data
sizes) that cannot be delivered by the use of any
single fixed policy.\par
The SLAW scheduler is designed for programming models
where locality hints are provided to the runtime by the
programmer or compiler, and achieves {\em
locality-awareness\/} by grouping workers into {\em
places}. Locality awareness can lead to improved
performance by increasing temporal data reuse within a
worker and among workers in the same place. Our
experimental results show that locality-aware
scheduling can achieve up to 2.6x speedup over
locality-oblivious scheduling, for the benchmarks
studied.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "help-first; work-first; work-stealing",
}
@Article{Yang:2010:OCG,
author = "Yi Yang and Ping Xiang and Jingfei Kong and Huiyang
Zhou",
title = "An optimizing compiler for {GPGPU} programs with
input-data sharing",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "343--344",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693505",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developing high performance GPGPU programs is
challenging for application developers since the
performance is dependent upon how well the code
leverages the hardware features of specific graphics
processors. To solve this problem and relieve
application developers of low-level hardware-specific
optimizations, we introduce a novel compiler to
optimize GPGPU programs. Our compiler takes a naive GPU
kernel function, which is functionally correct but
without any consideration for performance optimization.
The compiler then analyzes the code, identifies memory
access patterns, and generates optimized code. The
proposed compiler optimizations target at one category
of scientific and media processing algorithms, which
has the characteristics of input-data sharing when
computing neighboring output pixels/elements. Many
commonly used algorithms, such as matrix
multiplication, convolution, etc., share such
characteristics. For these algorithms, novel approaches
are proposed to enforce memory coalescing and achieve
effective data reuse. Data prefetching and
hardware-specific tuning are also performed
automatically with our compiler framework. The
experimental results based on a set of applications
show that our compiler achieves very high performance,
either superior or very close to the highly fine-tuned
library, NVIDIA CUBLAS 2.1.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compiler; GPGPU",
}
@Article{Chandramowlishwaran:2010:ACC,
author = "Aparna Chandramowlishwaran and Kathleen Knobe and
Richard Vuduc",
title = "Applying the concurrent collections programming model
to asynchronous parallel dense linear algebra",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "345--346",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693506",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This poster is a case study on the application of a
novel programming model, called Concurrent Collections
(CnC), to the implementation of an
asynchronous-parallel algorithm for computing the
Cholesky factorization of dense matrices. In CnC, the
programmer expresses her computation in terms of
application-specific operations, partially-ordered by
semantic scheduling constraints. We demonstrate the
performance potential of CnC in this poster, by showing
that our Cholesky implementation nearly matches or
exceeds competing vendor-tuned codes and alternative
programming models. We conclude that the CnC model is
well-suited for expressing asynchronous-parallel
algorithms on emerging multicore systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "asynchronous algorithms; concurrent collections; dense
linear algebra",
}
@Article{Hoffmann:2010:AHS,
author = "Henry Hoffmann and Jonathan Eastep and Marco D.
Santambrogio and Jason E. Miller and Anant Agarwal",
title = "Application heartbeats for software performance and
health",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "347--348",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693507",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Adaptive, or self-aware, computing has been proposed
to help application programmers confront the growing
complexity of multicore software development. However,
existing approaches to adaptive systems are largely ad
hoc and often do not manage to incorporate the true
performance goals of the applications they are designed
to support. This paper presents an enabling technology
for adaptive computing systems: Application Heartbeats.
The Application Heartbeats framework provides a simple,
standard programming interface that applications can
use to indicate their performance and system software
(and hardware) can use to query an application's
performance. The PARSEC benchmark suite is instrumented
with Application Heartbeats to show the broad
applicability of the interface and an external resource
scheduler demonstrates the use of the interface by
assigning cores to an application to maintain a
designated performance goal.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "adaptive algorithms",
}
@Article{Porter:2010:MTM,
author = "Donald E. Porter and Emmett Witchel",
title = "Modeling transactional memory workload performance",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "349--350",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693508",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional memory promises to make parallel
programming easier than with fine-grained locking,
while performing just as well. This performance claim
is not always borne out because an application may
violate a common-case assumption of the TM designer or
because of external system effects. In order to help
programmers assess the suitability of their code for
transactional memory, this work introduces a formal
model of transactional memory as well as a tool, called
Syncchar. Syncchar can predict the speedup of a
conversion from locks to transactions within 25\% for
the STAMP benchmarks. Because getting good performance
from transactions is more difficult than commonly
appreciated, developers need tools to tune
transactional performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "performance; Syncchar; transactional memory",
}
@Article{Carter:2010:PLN,
author = "John D. Carter and William B. Gardner and Gary
Grewal",
title = "The {Pilot} library for novice {MPI} programmers",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "351--352",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Pilot library is a new method for programming
MPI-enabled clusters in C, targeted at novice parallel
programmers. Formal elements from Communicating
Sequential Processes (CSP) are used to realize a
process/channel model of parallel computation that
reduces opportunities for deadlock and other
communication errors. This simple model, plus an
application programming inter-face (API) styled after
C's formatted I/O, are designed to make the library
easy to learn. The Pilot library exists as a thin layer
on top of any standard Message Passing Interface (MPI)
implementation, preserving MPI's portability and
efficiency, with little performance overhead arising as
result of Pilot's additional features.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "C; cluster programming; collective operations;
deadlock detection; high-performance computing; MPI",
}
@Article{Jang:2010:DTE,
author = "Byunghyun Jang and Perhaad Mistry and Dana Schaa and
Rodrigo Dominguez and David Kaeli",
title = "Data transformations enabling loop vectorization on
multithreaded data parallel architectures",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "353--354",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693510",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Loop vectorization, a key feature exploited to obtain
high performance on Single Instruction Multiple Data
(SIMD) vector architectures, is significantly hindered
by irregular memory access patterns in the data stream.
This paper describes data transformations that allow us
to vectorize loops targeting massively multithreaded
data parallel architectures. We present a mathematical
model that captures loop-based memory access patterns
and computes the most appropriate data transformations
in order to enable vectorization. Our experimental
results show that the proposed data transformations can
significantly increase the number of loops that can be
vectorized and enhance the data-level parallelism of
applications. Our results also show that the overhead
associated with our data transformations can be easily
amortized as the size of the input data set increases.
For the set of high performance benchmark kernels
studied, we achieve consistent and significant
performance improvements (up to 11.4X) by applying
vectorization using our data transformation approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data transformation; GPGPU; loop vectorization",
}
@Article{Buehrer:2010:DPS,
author = "Gregory Buehrer and Srinivasan Parthasarathy and
Shirish Tatikonda",
title = "A distributed placement service for graph-structured
and tree-structured data",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "355--356",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Effective data placement strategies can enhance the
performance of data-intensive applications implemented
on high end computing clusters. Such strategies can
have a significant impact in localizing the
computation, in minimizing synchronization
(communication) costs, in enhancing reliability (via
strategic replication policies), and in ensuring a
balanced workload or enhancing the available bandwidth
from massive storage devices (e.g. disk
arrays).\par
Existing work has largely targeted the placement of
relatively simple data types or entities (e.g.
elements, vectors, sets, and arrays). Here we
investigate several hash-based distributed data
placement methods targeting tree- and graph- structured
data, and develop a locality enhancing placement
service for large cluster systems. Target applications
include the placement of a single large graph (e.g. Web
graph), a single large tree (e.g. large XML file), a
forest of graphs or trees (e.g. XML database) and other
specialized graph data types - bi-partite (query-click
graphs), directed acyclic graphs etc. We empirically
evaluate our service by demonstrating its use in
improving mining executions for pattern discovery,
nearest neighbor searching, graph computations, and
applications that combine link and content analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data placement; distributed computing; structured
data",
}
@Article{Li:2010:SVC,
author = "Guodong Li and Ganesh Gopalakrishnan and Robert M.
Kirby and Dan Quinlan",
title = "A symbolic verifier for {CUDA} programs",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "357--358",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a preliminary automated verifier based on
mechanical decision procedures which is able to prove
functional correctness of CUDA programs and guarantee
to detect bugs such as race conditions. We also employ
a symbolic partial order reduction (POR) technique to
mitigate the interleaving explosion problem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cuda; formal verification; SPMD; symbolic analysis",
}
@Article{Richards:2010:ADB,
author = "Gregor Richards and Sylvain Lebresne and Brian Burg
and Jan Vitek",
title = "An analysis of the dynamic behavior of {JavaScript}
programs",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "1--12",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806598",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The JavaScript programming language is widely used for
web programming and, increasingly, for general purpose
computing. As such, improving the correctness, security
and performance of JavaScript applications has been the
driving force for research in type systems, static
analysis and compiler techniques for this language.
Many of these techniques aim to reign in some of the
most dynamic features of the language, yet little seems
to be known about how programmers actually utilize the
language or these features. In this paper we perform an
empirical study of the dynamic behavior of a corpus of
widely-used JavaScript programs, and analyze how and
why the dynamic features are used. We report on the
degree of dynamism that is exhibited by these
JavaScript programs and compare that with assumptions
commonly made in the literature and accepted industry
benchmark suites.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic behavior; dynamic metrics; execution tracing;
javascript; program analysis",
}
@Article{Bond:2010:BEC,
author = "Michael D. Bond and Graham Z. Baker and Samuel Z.
Guyer",
title = "{Breadcrumbs}: efficient context sensitivity for
dynamic bug detection analyses",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "13--24",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806599",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Calling context--the set of active methods on the
stack--is critical for understanding the dynamic
behavior of large programs. Dynamic program analysis
tools, however, are almost exclusively context
insensitive because of the prohibitive cost of
representing calling contexts at run time. Deployable
dynamic analyses, in particular, have been limited to
reporting only static program locations.\par
This paper presents Breadcrumbs, an efficient technique
for recording and reporting dynamic calling contexts.
It builds on an existing technique for computing a
compact (one word) encoding of each calling context
that client analyses can use in place of a program
location. The key feature of our system is a search
algorithm that can reconstruct a calling context from
its encoding using only a static call graph and a small
amount of dynamic information collected at cold
(infrequently executed) callsites. Breadcrumbs requires
no offline training or program modifications, and
handles all language features, including dynamic class
loading.\par
We use Breadcrumbs to add context sensitivity to two
dynamic analyses: a data-race detector and an analysis
for diagnosing null pointer exceptions. On average, it
adds 10\% to 20\% runtime overhead, depending on a
tunable parameter that controls how much dynamic
information is collected. Collecting less information
lowers the overhead, but can result in a search space
explosion. In some cases this causes reconstruction to
fail, but in most cases Breadcrumbs >produces
non-trivial calling contexts that have the potential to
significantly improve both the precision of the
analyses and the quality of the bug reports.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "bug detection; context sensitivity; dynamic analysis",
}
@Article{Ruwase:2010:DLE,
author = "Olatunji Ruwase and Shimin Chen and Phillip B. Gibbons
and Todd C. Mowry",
title = "Decoupled lifeguards: enabling path optimizations for
dynamic correctness checking tools",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "25--35",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806600",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic correctness checking tools (a.k.a. lifeguards)
can detect a wide array of correctness issues, such as
memory, security, and concurrency misbehavior, in
unmodified executables at run time. However, lifeguards
that are implemented using dynamic binary
instrumentation (DBI) often slow down the monitored
application by 10-50X, while proposals that replace DBI
with hardware still see 3-8X slowdowns. The remaining
overhead is the cost of performing the lifeguard
analysis itself. In this paper, we explore compiler
optimization techniques to reduce this
overhead.\par
The lifeguard software is typically structured as a set
of event-driven handlers, where the events are
individual instructions in the monitored application's
dynamic instruction stream. We propose to {\em
decouple\/} the lifeguard checking code from the
application that it is monitoring so that the lifeguard
analysis can be invoked at the granularity of {\em hot
paths\/} in the monitored application. In this way, we
are able to find many more opportunities for
eliminating redundant work in the lifeguard analysis,
even starting with well-optimized applications and
hand-tuned lifeguard handlers. Experimental results
with two lifeguard frameworks - one DBI-based and one
hardware-assisted - show significant reduction in
monitoring overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic code optimization; dynamic correctness
checking; dynamic program analysis",
}
@Article{Lee:2010:JSD,
author = "Byeongcheol Lee and Ben Wiedermann and Martin Hirzel
and Robert Grimm and Kathryn S. McKinley",
title = "{Jinn}: synthesizing dynamic bug detectors for foreign
language interfaces",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "36--49",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806601",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming language specifications mandate static and
dynamic analyses to preclude syntactic and semantic
errors. Although individual languages are usually
well-specified, composing languages is not, and this
poor specification is a source of many errors in {\em
multilingual\/} programs. For example, virtually all
Java programs compose Java and C using the Java Native
Interface (JNI). Since JNI is informally specified,
developers have difficulty using it correctly, and
current Java compilers and virtual machines (VMs)
inconsistently check only a subset of JNI
constraints.\par
This paper's most significant contribution is to show
how to synthesize dynamic analyses from state machines
to detect foreign function interface (FFI) violations.
We identify three classes of FFI constraints encoded by
eleven state machines that capture thousands of JNI and
Python/C FFI rules. We use a mapping function to
specify which state machines, transitions, and program
entities (threads, objects, references) to check at
each FFI call and return. From this function, we
synthesize a context-specific dynamic analysis to find
FFI bugs. We build bug detection tools for JNI and
Python/C using this approach. For JNI, we dynamically
and transparently interpose the analysis on Java and C
language transitions through the JVM tools interface.
The resulting tool, called Jinn, is compiler and
virtual machine {\em independent}. It detects and
diagnoses a wide variety of FFI bugs that other tools
miss. This approach greatly reduces the annotation
burden by exploiting common FFI constraints: whereas
the generated Jinn code is 22,000+ lines, we wrote only
1,400 lines of state machine and mapping code. Overall,
this paper lays the foundation for a more principled
approach to developing correct multilingual software
and a more concise and automated approach to FFI
specification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic analysis; ffi bugs; foreign function
interfaces (FFI); java native interface (jni);
multilingual programs; python/C; specification;
specification generation",
}
@Article{Prabhu:2010:SPS,
author = "Prakash Prabhu and Ganesan Ramalingam and Kapil
Vaswani",
title = "Safe programmable speculative parallelism",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "50--61",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806603",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Execution order constraints imposed by dependences can
serialize computation, preventing parallelization of
code and algorithms. Speculating on the value(s)
carried by dependences is one way to break such
critical dependences. Value speculation has been used
effectively at a low level, by compilers and hardware.
In this paper, we focus on the use of speculation {\em
by programmers\/} as an algorithmic paradigm to
parallelize seemingly sequential code.\par
We propose two new language constructs, {\em
speculative composition\/} and {\em speculative
iteration}. These constructs enable programmers to
declaratively express speculative parallelism in
programs: to indicate when and how to speculate,
increasing the parallelism in the program, without
concerning themselves with mundane implementation
details.\par
We present a core language with speculation constructs
and mutable state and present a formal operational
semantics for the language. We use the semantics to
define the notion of a correct speculative execution as
one that is equivalent to a non-speculative execution.
In general, speculation requires a runtime mechanism to
undo the effects of speculative computation in the case
of mis predictions. We describe a set of conditions
under which such rollback can be avoided. We present a
static analysis that checks if a given program
satisfies these conditions. This allows us to implement
speculation efficiently, without the overhead required
for rollbacks.\par
We have implemented the speculation constructs as a C\#
library, along with the static checker for safety. We
present an empirical evaluation of the efficacy of this
approach to parallelization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "purity; rollback freedom; safety; speculative
parallelism; value speculation",
}
@Article{Tian:2010:SSP,
author = "Chen Tian and Min Feng and Rajiv Gupta",
title = "Supporting speculative parallelization in the presence
of dynamic data structures",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "62--73",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806604",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The availability of multicore processors has led to
significant interest in compiler techniques for
speculative parallelization of sequential programs.
Isolation of speculative state from non-speculative
state forms the basis of such speculative techniques as
this separation enables recovery from misspeculations.
In our prior work on CorD [35,36] we showed that for
array and scalar variable based programs copying of
data between speculative and non-speculative memory can
be highly optimized to support state separation that
yields significant speedups on multicore machines
available today. However, we observe that in context of
heap-intensive programs that operate on linked dynamic
data structures, state separation based speculative
parallelization poses many challenges. The copying of
data structures from non-speculative to speculative
state (copy-in operation) can be very expensive due to
the large sizes of dynamic data structures. The copying
of updated data structures from speculative state to
non-speculative state (copy-out operation) is made
complex due to the changes in the shape and size of the
dynamic data structure made by the speculative
computation. In addition, we must contend with the need
to translate pointers internal to dynamic data
structures between their non-speculative and
speculative memory addresses. In this paper we develop
an augmented design for the representation of dynamic
data structures such that all of the above operations
can be performed efficiently. Our experiments
demonstrate significant speedups on a real machine for
a set of programs that make extensive use of heap based
dynamic data structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "multicore processors; speculative parallelization",
}
@Article{Kandemir:2010:CTA,
author = "Mahmut Kandemir and Taylan Yemliha and SaiPrashanth
Muralidhara and Shekhar Srikantaiah and Mary Jane Irwin
and Yuanrui Zhnag",
title = "Cache topology aware computation mapping for
multicores",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "74--85",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806605",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The main contribution of this paper is a compiler
based, cache topology aware code optimization scheme
for emerging multicore systems. This scheme distributes
the iterations of a loop to be executed in parallel
across the cores of a target multicore machine and
schedules the iterations assigned to each core. Our
goal is to improve the utilization of the on-chip
multi-layer cache hierarchy and to maximize overall
application performance. We evaluate our cache topology
aware approach using a set of twelve applications and
three different commercial multicore machines. In
addition, to study some of our experimental parameters
in detail and to explore future multicore machines
(with higher core counts and deeper on-chip cache
hierarchies), we also conduct a simulation based study.
The results collected from our experiments with three
Intel multicore machines show that the proposed
compiler-based approach is very effective in enhancing
performance. In addition, our simulation results
indicate that optimizing for the on-chip cache
hierarchy will be even more important in future
multicores with increasing numbers of cores and cache
levels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cache; compiler; multicore; multi-level;
topology-aware",
}
@Article{Yang:2010:GCM,
author = "Yi Yang and Ping Xiang and Jingfei Kong and Huiyang
Zhou",
title = "A {GPGPU} compiler for memory optimization and
parallelism management",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "86--97",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806606",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a novel optimizing compiler for
general purpose computation on graphics processing
units (GPGPU). It addresses two major challenges of
developing high performance GPGPU programs: effective
utilization of GPU memory hierarchy and judicious
management of parallelism.\par
The input to our compiler is a na&\#239;ve GPU kernel
function, which is functionally correct but without any
consideration for performance optimization. The
compiler analyzes the code, identifies its memory
access patterns, and generates both the optimized
kernel and the kernel invocation parameters. Our
optimization process includes vectorization and memory
coalescing for memory bandwidth enhancement, tiling and
unrolling for data reuse and parallelism management,
and thread block remapping or address-offset insertion
for partition-camping elimination. The experiments on a
set of scientific and media processing algorithms show
that our optimized code achieves very high performance,
either superior or very close to the highly fine-tuned
library, NVIDIA CUBLAS 2.2, and up to 128 times
speedups over the naive versions. Another
distinguishing feature of our compiler is the
understandability of the optimized code, which is
useful for performance analysis and algorithm
refinement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compiler; gpgpu",
}
@Article{Eggers:2010:AL,
author = "Susan Eggers",
title = "{2010 Athena} lecture",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "98--98",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806608",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Susan Eggers, a Professor of Computer Science and
Engineering at the University of Washington, joined her
department in 1989. She received a B.A. in 1965 from
Connecticut College and a Ph. D. in 1989 from the
University of California, Berkeley. Her research
interests are in computer architecture and back-end
compiler optimization, with an emphasis on experimental
performance analysis. With her colleague Hank Levy and
their students, she developed the first commercially
viable multithreaded architecture, Simultaneous
Multithreading, adopted by Intel (as Hyperthreading),
IBM, Sun and others. Her current research is in the
areas of distributed dataflow machines, FPGAs and chip
multiprocessors. In 1989 Professor Eggers was awarded
an IBM Faculty Development Award, in 1990 an NSF
Presidential Young Investigator Award, in 1994 the
Microsoft Professorship in Computer Science and
Engineering, and in 2009 the ACM-W Athena Lecturer. She
is a Fellow of the ACM and IEEE, a Fellow of the AAAS,
and a member of the National Academy of Engineering.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "invited talk",
}
@Article{Yang:2010:SLI,
author = "Jean Yang and Chris Hawblitzel",
title = "Safe to the last instruction: automated verification
of a type-safe operating system",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "99--110",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806610",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Typed assembly language (TAL) and Hoare logic can
verify the absence of many kinds of errors in low-level
code. We use TAL and Hoare logic to achieve highly
automated, static verification of the safety of a new
operating system called Verve. Our techniques and tools
mechanically verify the safety of every assembly
language instruction in the operating system, run-time
system, drivers, and applications (in fact, every part
of the system software except the boot loader). Verve
consists of a 'Nucleus' that provides primitive access
to hardware and memory, a kernel that builds services
on top of the Nucleus, and applications that run on top
of the kernel. The Nucleus, written in verified
assembly language, implements allocation, garbage
collection, multiple stacks, interrupt handling, and
device access. The kernel, written in C\# and compiled
to TAL, builds higher-level services, such as
preemptive threads, on top of the Nucleus. A TAL
checker verifies the safety of the kernel and
applications. A Hoare-style verifier with an automated
theorem prover verifies both the safety and correctness
of the Nucleus. Verve is, to the best of our knowledge,
the first operating system mechanically verified to
guarantee both type and memory safety. More generally,
Verve's approach demonstrates a practical way to mix
high-level typed code with low-level untyped code in a
verifiably safe manner.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "operating system; run-time system; type safety;
verification",
}
@Article{Tatlock:2010:BEV,
author = "Zachary Tatlock and Sorin Lerner",
title = "Bringing extensibility to verified compilers",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "111--121",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806611",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Verified compilers, such as Leroy's CompCert, are
accompanied by a fully checked correctness proof. Both
the compiler and proof are often constructed with an
interactive proof assistant. This technique provides a
strong, end-to-end correctness guarantee on top of a
small trusted computing base. Unfortunately, these
compilers are also challenging to extend since each
additional transformation must be proven correct in
full formal detail.\par
At the other end of the spectrum, techniques for
compiler correctness based on a domain-specific
language for writing optimizations, such as Lerner's
Rhodium and Cobalt, make the compiler easy to extend:
the correctness of additional transformations can be
checked completely automatically. Unfortunately, these
systems provide a weaker guarantee since their
end-to-end correctness has not been proven fully
formally.\par
We present an approach for compiler correctness that
provides the best of both worlds by bridging the gap
between compiler verification and compiler
extensibility. In particular, we have extended Leroy's
CompCert compiler with an execution engine for
optimizations written in a domain specific and proved
that this execution engine preserves program semantics,
using the Coq proof assistant. We present our CompCert
extension, XCert, including the details of its
execution engine and proof of correctness in Coq.
Furthermore, we report on the important lessons learned
for making the proof development manageable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compiler optimization; correctness; extensibility",
}
@Article{Chlipala:2010:UST,
author = "Adam Chlipala",
title = "{Ur}: statically-typed metaprogramming with type-level
record computation",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "122--133",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806612",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "{\em Dependent types\/} provide a strong foundation
for specifying and verifying rich properties of
programs through type-checking. The earliest
implementations combined dependency, which allows types
to mention program variables; with type-level
computation, which facilitates expressive
specifications that compute with recursive functions
over types. While many recent applications of dependent
types omit the latter facility, we argue in this paper
that it deserves more attention, even when implemented
without dependency.\par
In particular, the ability to use functional programs
as specifications enables {\em statically-typed
metaprogramming\/}: programs write programs, and static
type-checking guarantees that the generating process
never produces invalid code. Since our focus is on
generic validity properties rather than full
correctness verification, it is possible to engineer
type inference systems that are very effective in
narrow domains. As a demonstration, we present Ur, a
programming language designed to facilitate
metaprogramming with first-class records and names. On
top of Ur, we implement Ur/Web, a special standard
library that enables the development of modern Web
applications. Ad-hoc code generation is already in wide
use in the popular Web application frameworks, and we
show how that generation may be tamed using types,
without forcing metaprogram authors to write proofs or
forcing metaprogram users to write any fancy types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dependent types; metaprogramming",
}
@Article{Emmi:2010:PVT,
author = "Michael Emmi and Rupak Majumdar and Roman Manevich",
title = "Parameterized verification of transactional memories",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "134--145",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806613",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe an automatic verification method to check
whether transactional memories ensure strict
serializability a key property assumed of the
transactional interface. Our main contribution is a
technique for effectively verifying parameterized
systems. The technique merges ideas from parameterized
hardware and protocol verification--verification by
invisible invariants and symmetry reduction--with ideas
from software verification--template-based invariant
generation and satisfiability checking for quantified
formul&\#230; (modulo theories). The combination
enables us to precisely model and analyze unbounded
systems while taming state explosion.\par
Our technique enables automated proofs that two-phase
locking (TPL), dynamic software transactional memory
(DSTM), and transactional locking II (TL2) systems
ensure strict serializability. The verification is
challenging since the systems are unbounded in several
dimensions: the number and length of concurrently
executing transactions, and the size of the shared
memory they access, have no finite limit. In contrast,
state-of-the-art software model checking tools such as
BLAST and TVLA are unable to validate either system,
due to inherent expressiveness limitations or state
explosion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "parameterized verification; transactional memory",
}
@Article{Pizlo:2010:SFT,
author = "Filip Pizlo and Lukasz Ziarek and Petr Maj and Antony
L. Hosking and Ethan Blanton and Jan Vitek",
title = "{SCHISM}: fragmentation-tolerant real-time garbage
collection",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "146--159",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806615",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Managed languages such as Java and C\# are being
considered for use in hard real-time systems. A hurdle
to their widespread adoption is the lack of garbage
collection algorithms that offer predictable
space-and-time performance in the face of
fragmentation. We introduce SCHISM/CMR, a new
concurrent and real-time garbage collector that is
fragmentation tolerant and guarantees time-and-space
worst-case bounds while providing good throughput.
SCHISM/CMR combines mark-region collection of
fragmented objects and arrays (arraylets) with separate
replication-copying collection of immutable arraylet
spines, so as to cope with external fragmentation when
running in small heaps. We present an implementation of
SCHISM/CMR in the Fiji VM, a high-performance Java
virtual machine for mission-critical systems, along
with a thorough experimental evaluation on a wide
variety of architectures, including server-class and
embedded systems. The results show that SCHISM/CMR
tolerates fragmentation better than previous schemes,
with a much more acceptable throughput penalty.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "fragmentation; mark-region; mark-sweep; real-time;
replication-copying",
}
@Article{Xu:2010:DIU,
author = "Guoqing Xu and Atanas Rountev",
title = "Detecting inefficiently-used containers to avoid
bloat",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "160--173",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806616",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Runtime bloat degrades significantly the performance
and scalability of software systems. An important
source of bloat is the inefficient use of containers.
It is expensive to create inefficiently-used containers
and to invoke their associated methods, as this may
ultimately execute large volumes of code, with call
stacks dozens deep, and allocate many temporary
objects.\par
This paper presents practical static and dynamic tools
that can find inappropriate use of containers in Java
programs. At the core of these tools is a base static
analysis that identifies, for each container, the
objects that are added to this container and the key
statements (i.e., heap loads and stores) that achieve
the semantics of common container operations such as
{\em ADD\/} and {\em GET}. The static tool finds
problematic uses of containers by considering the
nesting relationships among the loops where these {\em
semantics-achieving statements\/} are located, while
the dynamic tool can instrument these statements and
find inefficiencies by profiling their execution
frequencies.\par
The high precision of the base analysis is achieved by
taking advantage of a context-free language
(CFL)-reachability formulation of points-to analysis
and by accounting for container-specific properties. It
is demand-driven and client-driven, facilitating
refinement specific to each queried container object
and increasing scalability. The tools built with the
help of this analysis can be used both to avoid the
creation of container-related performance problems
early during development, and to help with diagnosis
when problems are observed during tuning. Our
experimental results show that the static tool has a
low false positive rate and produces more relevant
information than its dynamic counterpart. Further case
studies suggest that significant optimization
opportunities can be found by focusing on
statically-identified containers for which high
allocation frequency is observed at run time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cfl reachability; container bloat; points-to
analysis",
}
@Article{Xu:2010:FLU,
author = "Guoqing Xu and Nick Mitchell and Matthew Arnold and
Atanas Rountev and Edith Schonberg and Gary Sevitsky",
title = "Finding low-utility data structures",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "174--186",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806617",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many opportunities for easy, big-win, program
optimizations are missed by compilers. This is
especially true in highly layered Java applications.
Often at the heart of these missed optimization
opportunities lie computations that, with great
expense, produce data values that have little impact on
the program's final output. Constructing a new date
formatter to format every date, or populating a large
set full of expensively constructed structures only to
check its size: these involve costs that are out of
line with the benefits gained. This disparity between
the formation costs and accrued benefits of data
structures is at the heart of much runtime
bloat.\par
We introduce a run-time analysis to discover these {\em
low-utility\/} data structures. The analysis employs
dynamic thin slicing, which naturally associates costs
with value flows rather than raw data flows. It
constructs a model of the incremental, hop-to-hop,
costs and benefits of each data structure. The analysis
then identifies suspicious structures based on
imbalances of its incremental costs and benefits. To
decrease the memory requirements of slicing, we
introduce {\em abstract dynamic thin slicing}, which
performs thin slicing over bounded abstract domains. We
have modified the IBM J9 commercial JVM to implement
this approach.\par
We demonstrate two client analyses: one that finds
objects that are expensive to construct but are not
necessary for the forward execution, and second that
pinpoints ultimately-dead values. We have successfully
applied them to large-scale and long-running Java
applications. We show that these analyses are effective
at detecting operations that have unbalanced costs and
benefits.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "abstract dynamic thin slicing; cost benefit analysis;
memory bloat",
}
@Article{Mytkowicz:2010:EAJ,
author = "Todd Mytkowicz and Amer Diwan and Matthias Hauswirth
and Peter F. Sweeney",
title = "Evaluating the accuracy of {Java} profilers",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "187--197",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806618",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Performance analysts profile their programs to find
methods that are worth optimizing: the 'hot' methods.
This paper shows that four commonly-used Java profilers
({\em xprof, hprof, jprofile, and yourkit\/}) often
disagree on the identity of the hot methods. If two
profilers disagree, at least one must be incorrect.
Thus, there is a good chance that a profiler will
mislead a performance analyst into wasting time
optimizing a cold method with little or no performance
improvement.\par
This paper uses causality analysis to evaluate
profilers and to gain insight into the source of their
incorrectness. It shows that these profilers all
violate a fundamental requirement for sampling based
profilers: to be correct, a sampling-based profiler
must collect samples randomly.\par
We show that a proof-of-concept profiler, which
collects samples randomly, does not suffer from the
above problems. Specifically, we show, using a number
of case studies, that our profiler correctly identifies
methods that are important to optimize; in some cases
other profilers report that these methods are cold and
thus not worth optimizing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "bias; observer effect; profiling",
}
@Article{Baek:2010:GFS,
author = "Woongki Baek and Trishul M. Chilimbi",
title = "{Green}: a framework for supporting energy-conscious
programming using controlled approximation",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "198--209",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806620",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy-efficient computing is important in several
systems ranging from embedded devices to large scale
data centers. Several application domains offer the
opportunity to tradeoff quality of service/solution
(QoS) for improvements in performance and reduction in
energy consumption. Programmers sometimes take
advantage of such opportunities, albeit in an ad-hoc
manner and often without providing any QoS
guarantees.\par
We propose a system called Green that provides a simple
and flexible framework that allows programmers to take
advantage of such approximation opportunities in a
systematic manner while providing statistical QoS
guarantees. Green enables programmers to approximate
expensive functions and loops and operates in two
phases. In the calibration phase, it builds a model of
the QoS loss produced by the approximation. This model
is used in the operational phase to make approximation
decisions based on the QoS constraints specified by the
programmer. The operational phase also includes an
adaptation function that occasionally monitors the
runtime behavior and changes the approximation
decisions and QoS model to provide strong statistical
QoS guarantees.\par
To evaluate the effectiveness of Green, we implemented
our system and language extensions using the Phoenix
compiler framework. Our experiments using benchmarks
from domains such as graphics, machine learning, signal
processing, and finance, and an in-production,
real-world web search engine, indicate that Green can
produce significant improvements in performance and
energy consumption with small and controlled QoS
degradation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "controlled approximation; energy-conscious
programming",
}
@Article{Rajan:2010:GPM,
author = "Kaushik Rajan and Sriram Rajamani and Shashank
Yaduvanshi",
title = "{GUESSTIMATE}: a programming model for collaborative
distributed systems",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "210--220",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806621",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new programming model GUESSTIMATE for
developing collaborative distributed systems. The model
allows atomic, isolated operations that transform a
system from consistent state to consistent state, and
provides a shared transactional store for a collection
of such operations executed by various machines in a
distributed system. In addition to 'committed state'
which is identical in all machines in the distributed
system, GUESSTIMATE allows each machine to have a
replicated local copy of the state (called
'guesstimated state') so that operations on shared
state can be executed locally without any blocking,
while also guaranteeing that eventually all machines
agree on the sequences of operations executed. Thus,
each operation is executed multiple times, once at the
time of issue when it updates the guesstimated state of
the issuing machine, once when the operation is
committed (atomically) to the committed state of all
machines, and several times in between as the
guesstimated state converges toward the committed
state. While we expect the results of these executions
of the operation to be identical most of the time in
the class of applications we study, it is possible for
an operation to succeed the first time when it is
executed on the guesstimated state, and fail when it is
committed. GUESSTIMATE provides facilities that allow
the programmer to deal with this potential discrepancy.
This paper presents our programming model, its
operational semantics, its realization as an API in
C\#, and our experience building collaborative
distributed applications with this model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "collaborative applications; concurrency; distributed
systems; language extensions",
}
@Article{Xi:2010:CFM,
author = "Qian Xi and David Walker",
title = "A context-free markup language for semi-structured
text",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "221--232",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806622",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "An {\em ad hoc data format\/} is any nonstandard,
semi-structured data format for which robust data
processing tools are not easily available. In this
paper, we present ANNE, a new kind of markup language
designed to help users generate documentation and data
processing tools for ad hoc text data. More
specifically, given a new ad hoc data source, an ANNE
programmer edits the document to add a number of simple
annotations, which serve to specify its syntactic
structure. Annotations include elements that specify
constants, optional data, alternatives, enumerations,
sequences, tabular data, and recursive patterns. The
ANNE system uses a combination of user annotations and
the raw data itself to extract a context-free grammar
from the document. This context-free grammar can then
be used to parse the data and transform it into an XML
parse tree, which may be viewed through a browser for
analysis or debugging purposes. In addition, the ANNE
system generates a PADS/ML description, which may be
saved as lasting documentation of the data format or
compiled into a host of useful data processing
tools.\par
In addition to designing and implementing ANNE, we have
devised a semantic theory for the core elements of the
language. This semantic theory describes the editing
process, which translates a raw, unannotated text
document into an annotated document, and the grammar
extraction process, which generates a context-free
grammar from an annotated document. We also present an
alternative characterization of system behavior by
drawing upon ideas from the field of relevance logic.
This secondary characterization, which we call {\em
relevance analysis}, specifies a direct relationship
between unannotated documents and the context-free
grammars that our system can generate from them.
Relevance analysis allows us to prove important
theorems concerning the expressiveness and utility of
our system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "ad hoc data; ANNE; domain-specific languages; PADS;
tool generation",
}
@Article{Loitsch:2010:PFP,
author = "Florian Loitsch",
title = "Printing floating-point numbers quickly and accurately
with integers",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "233--243",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806623",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present algorithms for accurately converting
floating-point numbers to decimal representation. They
are fast (up to 4 times faster than commonly used
algorithms that use high-precision integers) and
correct: any printed number will evaluate to the same
number, when read again.\par
Our algorithms are fast, because they require only
fixed-size integer arithmetic. The sole requirement for
the integer type is that it has at least two more bits
than the significand of the floating-point number.
Hence, for IEEE 754 double-precision numbers (having a
53-bit significand) an integer type with 55 bits is
sufficient. Moreover we show how to exploit additional
bits to improve the generated output.\par
We present three algorithms with different properties:
the first algorithm is the most basic one, and does not
take advantage of any extra bits. It simply shows how
to perform the binary-to-decimal transformation with
the minimal number of bits. Our second algorithm
improves on the first one by using the additional bits
to produce a shorter (often the shortest)
result.\par
Finally we propose a third version that can be used
when the shortest output is a requirement. The last
algorithm either produces optimal decimal
representations (with respect to shortness and
rounding) or rejects its input. For IEEE 754
double-precision numbers and 64-bit integers roughly
99.4\% of all numbers can be processed efficiently. The
remaining 0.6\% are rejected and need to be printed by
a slower complete algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dtoa; floating-point printing",
}
@Article{Flanagan:2010:AMD,
author = "Cormac Flanagan and Stephen N. Freund",
title = "Adversarial memory for detecting destructive races",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "244--254",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806625",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multithreaded programs are notoriously prone to race
conditions, a problem exacerbated by the widespread
adoption of multi-core processors with complex memory
models and cache coherence protocols. Much prior work
has focused on static and dynamic analyses for race
detection, but these algorithms typically are unable to
distinguish destructive races that cause erroneous
behavior from benign races that do not. Performing this
classification manually is difficult, time consuming,
and error prone.\par
This paper presents a new dynamic analysis technique
that uses {\em adversarial memory\/} to classify race
conditions as destructive or benign on systems with
relaxed memory models. Unlike a typical language
implementation, which may only infrequently exhibit
non-sequentially consistent behavior, our adversarial
memory implementation exploits the full freedom of the
memory model to return older, unexpected, or stale
values for memory reads whenever possible, in an
attempt to crash the target program (that is, to force
the program to behave erroneously). A crashing
execution provides concrete evidence of a destructive
bug, and this bug can be strongly correlated with a
specific race condition in the target
program.\par
Experimental results with our Jumble prototype for Java
demonstrate that adversarial memory is highly effective
at identifying destructive race conditions, and in
distinguishing them from race conditions that are real
but benign. Adversarial memory can also reveal
destructive races that would not be detected by
traditional testing (even after thousands of runs) or
by model checkers that assume sequential consistency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "concurrency; dynamic analysis; race conditions;
relaxed memory models",
}
@Article{Bond:2010:PPD,
author = "Michael D. Bond and Katherine E. Coons and Kathryn S.
McKinley",
title = "{PACER}: proportional detection of data races",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "255--268",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806626",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data races indicate serious concurrency bugs such as
order, atomicity, and sequential consistency
violations. Races are difficult to find and fix, often
manifesting only after deployment. The frequency and
unpredictability of these bugs will only increase as
software adds parallelism to exploit multicore
hardware. Unfortunately, sound and precise race
detectors slow programs by factors of eight or more and
do not scale to large numbers of threads.\par
This paper presents a precise, low-overhead {\em
sampling-based\/} data race detector called Pacer.
PACER makes a {\em proportionality\/} guarantee: it
detects any race at a rate equal to the sampling rate,
by finding races whose first access occurs during a
global sampling period. During sampling, PACER tracks
all accesses using the dynamically sound and precise
FastTrack algorithm. In nonsampling periods, Pacer
discards sampled access information that cannot be part
of a reported race, {\em and\/} Pacer simplifies
tracking of the happens-before relationship, yielding
near-constant, instead of linear, overheads.
Experimental results confirm our theoretical
guarantees. PACER reports races in proportion to the
sampling rate. Its time and space overheads scale with
the sampling rate, and sampling rates of 1-3\% yield
overheads low enough to consider in production
software. The resulting system provides a 'get what you
pay for' approach that is suitable for identifying
real, hard-to-reproduce races in deployed systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "bugs; concurrency; data races; sampling",
}
@Article{Nakaike:2010:LER,
author = "Takuya Nakaike and Maged M. Michael",
title = "Lock elision for read-only critical sections in
{Java}",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "269--278",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806627",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is not uncommon in parallel workloads to encounter
shared data structures with read-mostly access
patterns, where operations that update data are
infrequent and most operations are read-only.
Typically, data consistency is guaranteed using mutual
exclusion or read-write locks. The cost of atomic
update of lock variables result in high overheads and
high cache coherence traffic under active sharing, thus
slowing down single thread performance and limiting
scalability.\par
In this paper, we present {\em SOLERO (Software
Optimistic Lock Elision for Read-Only critical
sections)}, a new lock implementation called for
optimizing read-only critical sections in Java based on
sequential locks. SOLERO is compatible with the
conventional lock implementation of Java. However,
unlike the conventional implementation, only critical
sections that may write data or have side effects need
to update lock variables, while read-only critical
sections need only read lock variables without writing
them. Each writing critical section changes the lock
value to a new value. Hence, a read-only critical
section is guaranteed to be consistent if the lock is
free and its value does not change from the beginning
to the end of the read-only critical section.\par
Using Java workloads including SPECjbb2005 and the
HashMap and TreeMap Java classes, we evaluate the
performance impact of applying SOLERO to read-mostly
locks. Our experimental results show performance
improvements across the board, often substantial, in
both single thread speed and scalability over the
conventional lock implementation (mutual exclusion) and
read-write locks. SOLERO improves the performance of
SPECjbb2005 by 3-5\% on single and multiple threads.
The results using the HashMap and TreeMap benchmarks
show that SOLERO outperforms the conventional lock
implementation and read-write locks by substantial
multiples on multi-threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "java; just-in-time compiler; lock; lock elision;
monitor; optimization; synchronization",
}
@Article{Chaudhuri:2010:SI,
author = "Swarat Chaudhuri and Armando Solar-Lezama",
title = "Smooth interpretation",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "279--291",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806629",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present {\em smooth interpretation}, a method to
systematically approximate numerical imperative
programs by smooth mathematical functions. This
approximation facilitates the use of numerical search
techniques like gradient descent for program analysis
and synthesis. The method extends to programs the
notion of {\em Gaussian smoothing}, a popular
signal-processing technique that filters out noise and
discontinuities from a signal by taking its convolution
with a Gaussian function.\par
In our setting, Gaussian smoothing executes a program
according to a probabilistic semantics; the execution
of program {\em P\/} on an input {\em x\/} after
Gaussian smoothing can be summarized as follows: (1)
Apply a Gaussian perturbation to {\em x\/} -- the
perturbed input is a random variable following a normal
distribution with mean {\em x}. (2) Compute and return
the {\em expected output\/} of {\em P\/} on this
perturbed input. Computing the expectation explicitly
would require the execution of {\em P\/} on all
possible inputs, but smooth interpretation bypasses
this requirement by using a form of symbolic execution
to approximate the effect of Gaussian smoothing on {\em
P}. The result is an efficient but approximate
implementation of Gaussian smoothing of
programs.\par
Smooth interpretation has the effect of attenuating
features of a program that impede numerical searches of
its input space -- for example, discontinuities
resulting from conditional branches are replaced by
continuous transitions. We apply smooth interpretation
to the problem of synthesizing values of numerical
control parameters in embedded control applications.
This problem is naturally formulated as one of
numerical optimization: the goal is to find parameter
values that minimize the error between the resulting
program and a programmer-provided behavioral
specification. Solving this problem by directly
applying numerical optimization techniques is often
impractical due to the discontinuities in the error
function. By eliminating these discontinuities, smooth
interpretation makes it possible to search the
parameter space efficiently by means of simple gradient
descent. Our experiments demonstrate the value of this
strategy in synthesizing parameters for several
challenging programs, including models of an automated
gear shift and a PID controller.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "continuity; parameter synthesis; program smoothing",
}
@Article{Gulwani:2010:RBP,
author = "Sumit Gulwani and Florian Zuleger",
title = "The reachability-bound problem",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "292--304",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806630",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We define the {\em reachability-bound problem\/} to be
the problem of finding a symbolic worst-case bound on
the number of times a given control location inside a
procedure is visited in terms of the inputs to that
procedure. This has applications in bounding resources
consumed by a program such as time, memory,
network-traffic, power, as well as estimating
quantitative properties (as opposed to boolean
properties) of data in programs, such as information
leakage or uncertainty propagation. Our approach to
solving the reachability-bound problem brings together
two different techniques for reasoning about loops in
an effective manner. One of these techniques is an
abstract-interpretation based iterative technique for
computing precise disjunctive invariants (to summarize
nested loops). The other technique is a non-iterative
proof-rules based technique (for loop bound
computation) that takes over the role of doing
inductive reasoning, while deriving its power from the
use of SMT solvers to reason about abstract loop-free
fragments.\par
Our solution to the reachability-bound problem allows
us to compute precise symbolic complexity bounds for
several loops in {.NET} base-class libraries for which
earlier techniques fail. We also illustrate the
precision of our algorithm for disjunctive invariant
computation (which has a more general applicability
beyond the reachability-bound problem) on a set of
benchmark examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "disjunctive invariants; pattern matching; ranking
functions; resource bound analysis; transitive
closure",
}
@Article{Might:2010:REC,
author = "Matthew Might and Yannis Smaragdakis and David {Van
Horn}",
title = "Resolving and exploiting the $k$-{CFA} paradox:
illuminating functional vs. object-oriented program
analysis",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "305--315",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806631",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Low-level program analysis is a fundamental problem,
taking the shape of 'flow analysis' in functional
languages and 'points-to' analysis in imperative and
object-oriented languages. Despite the similarities,
the vocabulary and results in the two communities
remain largely distinct, with limited
cross-understanding. One of the few links is Shivers's
$k$-CFA work, which has advanced the concept of
'context-sensitive analysis' and is widely known in
both communities.\par
Recent results indicate that the relationship between
the functional and object-oriented incarnations of
$k$-CFA is not as well understood as thought. Van Horn
and Mairson proved $k$-CFA for $k \geq 1$ to be
EXPTIME-complete; hence, no polynomial-time algorithm
can exist. Yet, there are several polynomial-time
formulations of context-sensitive points-to analyses in
object-oriented languages. Thus, it seems that
functional $k$-CFA may actually be a profoundly
different analysis from object-oriented $k$-CFA. We
resolve this paradox by showing that the exact same
specification of $k$-CFA is polynomial-time for
object-oriented languages yet exponential-time for
functional ones: objects and closures are subtly
different, in a way that interacts crucially with
context-sensitivity and complexity. This illumination
leads to an immediate payoff: by projecting the
object-oriented treatment of objects onto closures, we
derive a polynomial-time hierarchy of context-sensitive
CFAs for functional programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "control-flow analysis; functional; k-cfa; m-cfa;
object-oriented; pointer analysis; static analysis",
}
@Article{Kuncak:2010:CFS,
author = "Viktor Kuncak and Mika{\"e}l Mayer and Ruzica Piskac
and Philippe Suter",
title = "Complete functional synthesis",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "316--329",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806632",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Synthesis of program fragments from specifications can
make programs easier to write and easier to reason
about. To integrate synthesis into programming
languages, synthesis algorithms should behave in a
predictable way - they should succeed for a
well-defined class of specifications. They should also
support unbounded data types such as numbers and data
structures. We propose to generalize decision
procedures into predictable and complete synthesis
procedures. Such procedures are guaranteed to find code
that satisfies the specification if such code exists.
Moreover, we identify conditions under which synthesis
will statically decide whether the solution is
guaranteed to exist, and whether it is unique. We
demonstrate our approach by starting from decision
procedures for linear arithmetic and data structures
and transforming them into synthesis procedures. We
establish results on the size and the efficiency of the
synthesized code. We show that such procedures are
useful as a language extension with implicit value
definitions, and we show how to extend a compiler to
support such definitions. Our constructs provide the
benefits of synthesis to programmers, without requiring
them to learn new concepts or give up a deterministic
execution model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "bapa; decision procedure; Presburger arithmetic;
synthesis procedure",
}
@Article{Burckhardt:2010:LCA,
author = "Sebastian Burckhardt and Chris Dern and Madanlal
Musuvathi and Roy Tan",
title = "{Line-Up}: a complete and automatic linearizability
checker",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "330--340",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806634",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modular development of concurrent applications
requires thread-safe components that behave correctly
when called concurrently by multiple client threads.
This paper focuses on linearizability, a specific
formalization of thread safety, where all operations of
a concurrent component appear to take effect
instantaneously at some point between their call and
return. The key insight of this paper is that if a
component is intended to be deterministic, then it is
possible to build an automatic linearizability checker
by systematically enumerating the sequential behaviors
of the component and then checking if each its
concurrent behavior is equivalent to some sequential
behavior.\par
We develop this insight into a tool called Line-Up, the
first complete and automatic checker for {\em
deterministic linearizability}. It is complete, because
any reported violation proves that the implementation
is not linearizable with respect to {\em any\/}
sequential deterministic specification. It is
automatic, requiring no manual abstraction, no manual
specification of semantics or commit points, no
manually written test suites, no access to source
code.\par
We evaluate Line-Up by analyzing 13 classes with a
total of 90 methods in two versions of the {.NET}
Framework 4.0. The violations of deterministic
linearizability reported by Line-Up exposed seven
errors in the implementation that were fixed by the
development team.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "atomicity; linearizability; thread safety",
}
@Article{Torlak:2010:MCA,
author = "Emina Torlak and Mandana Vaziri and Julian Dolby",
title = "{MemSAT}: checking axiomatic specifications of memory
models",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "341--350",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806635",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Memory models are hard to reason about due to their
complexity, which stems from the need to strike a
balance between ease-of-programming and allowing
compiler and hardware optimizations. In this paper, we
present an automated tool, MemSAT, that helps in
debugging and reasoning about memory models. Given an
axiomatic specification of a memory model and a
multi-threaded test program containing assertions,
MemSAT outputs a trace of the program in which both the
assertions and the memory model axioms are satisfied,
if one can be found. The tool is fully automatic and is
based on a SAT solver. If it cannot find a trace, it
outputs a minimal subset of the memory model and
program constraints that are unsatisfiable. We used
MemSAT to check several existing memory models against
their published test cases, including the current Java
Memory Model by Manson et al. and a revised version of
it by Sevcik and Aspinall. We found subtle
discrepancies between what was expected and the actual
results of test programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "axiomatic specifications; bounded model checking;
memory models; sat",
}
@Article{Marino:2010:DSE,
author = "Daniel Marino and Abhayendra Singh and Todd Millstein
and Madanlal Musuvathi and Satish Narayanasamy",
title = "{DRFX}: a simple and efficient memory model for
concurrent programming languages",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "351--362",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806636",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The most intuitive memory model for shared-memory
multithreaded programming is {\em sequential
consistency\/} (SC), but it disallows the use of many
compiler and hardware optimizations thereby impacting
performance. Data-race-free (DRF) models, such as the
proposed C++0x memory model, guarantee SC execution for
datarace-free programs. But these models provide no
guarantee at all for racy programs, compromising the
safety and debuggability of such programs. To address
the safety issue, the Java memory model, which is also
based on the DRF model, provides a weak semantics for
racy executions. However, this semantics is subtle and
complex, making it difficult for programmers to reason
about their programs and for compiler writers to ensure
the correctness of compiler optimizations.\par
We present the DRFx memory model, which is simple for
programmers to understand and use while still
supporting many common optimizations. We introduce a
{\em memory model (MM) exception\/} which can be
signaled to halt execution. If a program executes
without throwing this exception, then DRFx guarantees
that the execution is SC. If a program throws an MM
exception during an execution, then DRFx guarantees
that the program has a data race. We observe that SC
violations can be detected in hardware through a
lightweight form of conflict detection. Furthermore,
our model safely allows aggressive compiler and
hardware optimizations within compiler-designated
program regions. We formalize our memory model, prove
several properties about this model, describe a
compiler and hardware design suitable for DRFx, and
evaluate the performance overhead due to our compiler
and hardware requirements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data races; memory model exception; memory models;
sequential consistency; soft fences",
}
@Article{Chambers:2010:FEE,
author = "Craig Chambers and Ashish Raniwala and Frances Perry
and Stephen Adams and Robert R. Henry and Robert
Bradshaw and Nathan Weizenbaum",
title = "{FlumeJava}: easy, efficient data-parallel pipelines",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "363--375",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806638",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "MapReduce and similar systems significantly ease the
task of writing data-parallel code. However, many
real-world computations require a pipeline of
MapReduces, and programming and managing such pipelines
can be difficult. We present FlumeJava, a Java library
that makes it easy to develop, test, and run efficient
data-parallel pipelines. At the core of the FlumeJava
library are a couple of classes that represent
immutable parallel collections, each supporting a
modest number of operations for processing them in
parallel. Parallel collections and their operations
present a simple, high-level, uniform abstraction over
different data representations and execution
strategies. To enable parallel operations to run
efficiently, FlumeJava defers their evaluation, instead
internally constructing an execution plan dataflow
graph. When the final results of the parallel
operations are eventually needed, FlumeJava first
optimizes the execution plan, and then executes the
optimized operations on appropriate underlying
primitives (e.g., MapReduces). The combination of
high-level abstractions for parallel data and
computation, deferred evaluation and optimization, and
efficient parallel primitives yields an easy-to-use
system that approaches the efficiency of hand-optimized
pipelines. FlumeJava is in active use by hundreds of
pipeline developers within Google.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data-parallel programming; java; mapreduce",
}
@Article{Pan:2010:CPS,
author = "Heidi Pan and Benjamin Hindman and Krste
Asanovi{\'c}",
title = "Composing parallel software efficiently with {Lithe}",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "376--387",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806639",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Applications composed of multiple parallel libraries
perform poorly when those libraries interfere with one
another by obliviously using the same physical cores,
leading to destructive resource oversubscription. This
paper presents the design and implementation of {\em
Lithe}, a low-level substrate that provides the basic
primitives and a standard interface for composing
parallel codes efficiently. Lithe can be inserted
underneath the runtimes of legacy parallel libraries to
provide {\em bolt-on\/} composability without needing
to change existing application code. Lithe can also
serve as the foundation for building new parallel
abstractions and libraries that automatically
interoperate with one another.\par
In this paper, we show versions of Threading Building
Blocks (TBB) and OpenMP perform competitively with
their original implementations when ported to Lithe.
Furthermore, for two applications composed of multiple
parallel libraries, we show that leveraging our
substrate outperforms their original, even expertly
tuned, implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "composability; cooperative scheduling; hierarchical
scheduling; oversubscription; parallelism; resource
management; user-level scheduling",
}
@Article{Zhou:2010:BDC,
author = "Jin Zhou and Brian Demsky",
title = "{Bamboo}: a data-centric, object-oriented approach to
many-core software",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "388--399",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806640",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traditional data-oriented programming languages such
as dataflow languages and stream languages provide a
natural abstraction for parallel programming. In these
languages, a developer focuses on the flow of data
through the computation and these systems free the
developer from the complexities of low-level,
thread-oriented concurrency primitives. This
simplification comes at a cost --- traditional
data-oriented approaches restrict the mutation of state
and, in practice, the types of data structures a
program can effectively use. Bamboo borrows from work
in typestate and software transactions to relax the
traditional restrictions of data-oriented programming
models to support mutation of arbitrary data
structures.\par
We have implemented a compiler for Bamboo which
generates code for the TILEPro64 many-core processor.
We have evaluated this implementation on six
benchmarks: Tracking, a feature tracking algorithm from
computer vision; KMeans, a K-means clustering
algorithm; MonteCarlo, a Monte Carlo simulation;
FilterBank, a multi-channel filter bank; Fractal, a
Mandelbrot set computation; and Series, a Fourier
series computation. We found that our compiler
generated implementations that obtained speedups
ranging from 26.2x to 61.6x when executed on 62
cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "data-centric languages; many-core programming",
}
@Article{Westbrook:2010:MJM,
author = "Edwin Westbrook and Mathias Ricken and Jun Inoue and
Yilong Yao and Tamer Abdelatif and Walid Taha",
title = "{Mint}: {Java} multi-stage programming using weak
separability",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "400--411",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806642",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multi-stage programming (MSP) provides a disciplined
approach to run-time code generation. In the purely
functional setting, it has been shown how MSP can be
used to reduce the overhead of abstractions, allowing
clean, maintainable code without paying performance
penalties. Unfortunately, MSP is difficult to combine
with imperative features, which are prevalent in
mainstream languages. The central difficulty is scope
extrusion, wherein free variables can inadvertently be
moved outside the scopes of their binders. This paper
proposes a new approach to combining MSP with
imperative features that occupies a 'sweet spot' in the
design space in terms of how well useful MSP
applications can be expressed and how easy it is for
programmers to understand. The key insight is that
escapes (or 'anti-quotes') must be weakly separable
from the rest of the code, i.e. the computational
effects occurring inside an escape that are visible
outside the escape are guaranteed to not contain code.
To demonstrate the feasibility of this approach, we
formalize a type system based on Lightweight Java which
we prove sound, and we also provide an implementation,
called Mint, to validate both the expressivity of the
type system and the effect of staging on the
performance of Java programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "java; multi-staged languages; multi-stage programming;
type systems",
}
@Article{Chen:2010:TPC,
author = "Juan Chen and Ravi Chugh and Nikhil Swamy",
title = "Type-preserving compilation of end-to-end verification
of security enforcement",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "412--423",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806643",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A number of programming languages use rich type
systems to verify security properties of code. Some of
these languages are meant for source programming, but
programs written in these languages are compiled
without explicit security proofs, limiting their
utility in settings where proofs are necessary, e.g.,
proof-carrying authorization. Others languages do
include explicit proofs, but these are generally lambda
calculi not intended for source programming, that must
be further compiled to an executable form. A language
suitable for source programming backed by a compiler
that enables end-to-end verification is missing.\par
In this paper, we present a type-preserving compiler
that translates programs written in FINE, a
source-level functional language with dependent
refinements and affine types, to DCIL, a new extension
of the {.NET} Common Intermediate Language. FINE is type
checked using an external SMT solver to reduce the
proof burden on source programmers. We extract explicit
LCF-style proof terms from the solver and carry these
proof terms in the compilation to DCIL, thereby
removing the solver from the trusted computing base.
Explicit proofs enable DCIL to be used in a number of
important scenarios, including the verification of
mobile code, proof-carrying authorization, and
evidence-based auditing. We report on our experience
using FINE to build reference monitors for several
applications, ranging from a plugin-based email client
to a conference management server.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "authorization; bytecode languages; compilers;
dependent types; functional programming; information
flow; mobile code security; security type systems",
}
@Article{Tate:2010:IOO,
author = "Ross Tate and Juan Chen and Chris Hawblitzel",
title = "Inferable object-oriented typed assembly language",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "424--435",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806644",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A certifying compiler preserves type information
through compilation to assembly language programs,
producing typed assembly language (TAL) programs that
can be verified for safety independently so that the
compiler does not need to be trusted. There are two
challenges for adopting certifying compilation in
practice. First, requiring every compiler
transformation and optimization to preserve types is a
large burden on compilers, especially when adopting
certifying compilation into existing optimizing
non-certifying compilers. Second, type annotations
significantly increase the size of assembly language
programs.\par
This paper proposes an alternative to traditional
certifying compilers. It presents iTalX, the first
inferable TAL type system that supports existential
types, arrays, interfaces, and stacks. We have proved
our inference algorithm is complete, meaning if an
assembly language program is typeable with iTalX then
our algorithm will infer an iTalX typing for that
program. Furthermore, our algorithm is guaranteed to
terminate even if the assembly language program is
untypeable. We demonstrate that it is practical to
infer such an expressive TAL by showing a prototype
implementation of type inference for code compiled by
Bartok, an optimizing C\# compiler. Our prototype
implementation infers complete type annotations for
98\% of functions in a suite of realistic C\#
benchmarks. The type-inference time is about 8\% of the
compilation time. We needed to change only 2.5\% of the
compiler code, mostly adding new code for defining
types and for writing types to object files. Most
transformations are untouched. Type-annotation size is
only 17\% of the size of pure code and data, reducing
type annotations in our previous certifying compiler
[4] by 60\%. The compiler needs to preserve only
essential type information such as method signatures,
object-layout information, and types for static data
and external labels. Even non-certifying compilers have
most of this information available.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "certifying compiler; existential quantification;
object-oriented compiler; Typed Assembly Language
(TAL); type inference",
}
@Article{Khoo:2010:MTC,
author = "Yit Phang Khoo and Bor-Yuh Evan Chang and Jeffrey S.
Foster",
title = "Mixing type checking and symbolic execution",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "436--447",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806645",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Static analysis designers must carefully balance
precision and efficiency. In our experience, many
static analysis tools are built around an elegant, core
algorithm, but that algorithm is then extensively
tweaked to add just enough precision for the coding
idioms seen in practice, without sacrificing too much
efficiency. There are several downsides to adding
precision in this way: the tool's implementation
becomes much more complicated; it can be hard for an
end-user to interpret the tool's results; and as
software systems vary tremendously in their coding
styles, it may require significant algorithmic
engineering to enhance a tool to perform well in a
particular software domain.\par
In this paper, we present Mix, a novel system that
mixes type checking and symbolic execution. The key
aspect of our approach is that these analyses are
applied independently on disjoint parts of the program,
in an off-the-shelf manner. At the boundaries between
nested type checked and symbolically executed code
regions, we use special mix rules to communicate
information between the off-the-shelf systems. The
resulting mixture is a provably sound analysis that is
more precise than type checking alone and more
efficient than exclusive symbolic execution. In
addition, we also describe a prototype implementation,
Mixy, for C. Mixy checks for potential null
dereferences by mixing a null/non-null type qualifier
inference system with a symbolic executor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "false alarms; mix; mixed off-the-shelf analysis; mix
rules; precision; symbolic execution; type checking",
}
@Article{Chen:2010:EIO,
author = "Yang Chen and Yuanjie Huang and Lieven Eeckhout and
Grigori Fursin and Liang Peng and Olivier Temam and
Chengyong Wu",
title = "Evaluating iterative optimization across 1000
datasets",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "448--459",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806647",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While iterative optimization has become a popular
compiler optimization approach, it is based on a
premise which has never been truly evaluated: that it
is possible to learn the best compiler optimizations
across data sets. Up to now, most iterative
optimization studies find the best optimizations
through repeated runs on the same data set. Only a
handful of studies have attempted to exercise iterative
optimization on a few tens of data sets.\par
In this paper, we truly put iterative compilation to
the test for the first time by evaluating its
effectiveness across a large number of data sets. We
therefore compose KDataSets, a data set suite with 1000
data sets for 32 programs, which we release to the
public. We characterize the diversity of KDataSets, and
subsequently use it to evaluate iterative optimization.
We demonstrate that it is possible to derive a robust
iterative optimization strategy across data sets: for
all 32 programs, we find that there exists at least one
combination of compiler optimizations that achieves
86\% or more of the best possible speedup across {\em
all\/} data sets using Intel's ICC (83\% for GNU's
GCC). This optimal combination is program-specific and
yields speedups up to 1.71 on ICC and 2.23 on GCC over
the highest optimization level (-fast and -O3,
respectively). This finding makes the task of
optimizing programs across data sets much easier than
previously anticipated, and it paves the way for the
practical and reliable usage of iterative optimization.
Finally, we derive pre-shipping and post-shipping
optimization strategies for software vendors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "benchmarking; compiler optimization; iterative
optimization",
}
@Article{Kamruzzaman:2010:SDS,
author = "Md Kamruzzaman and Steven Swanson and Dean M.
Tullsen",
title = "Software data spreading: leveraging distributed caches
to improve single thread performance",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "460--470",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806648",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Single thread performance remains an important
consideration even for multicore, multiprocessor
systems. As a result, techniques for improving single
thread performance using multiple cores have received
considerable attention. This work describes a
technique, {\em software data spreading}, that
leverages the cache capacity of extra cores and extra
sockets rather than their computational resources.
Software data spreading is a software-only technique
that uses compiler-directed thread migration to
aggregate cache capacity across cores and chips and
improve performance. This paper describes an automated
scheme that applies data spreading to various types of
loops. Experiments with a set of SPEC2000, SPEC2006,
NAS, and microbenchmark workloads show that data
spreading can provide speedup of over 2, averaging 17\%
for the SPEC and NAS applications on two systems. In
addition, despite using more cores for the same
computation, data spreading actually saves power since
it reduces access to DRAM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "chip multiprocessors; compilers; single-thread
performance",
}
@Article{Sartor:2010:ZRD,
author = "Jennifer B. Sartor and Stephen M. Blackburn and Daniel
Frampton and Martin Hirzel and Kathryn S. McKinley",
title = "{Z}-rays: divide arrays and conquer speed and
flexibility",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "471--482",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806649",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Arrays are the ubiquitous organization for indexed
data. Throughout programming language evolution,
implementations have laid out arrays contiguously in
memory. This layout is problematic in space and time.
It causes heap fragmentation, garbage collection pauses
in proportion to array size, and wasted memory for
sparse and over-provisioned arrays. Because of array
virtualization in managed languages, an array layout
that consists of indirection pointers to fixed-size
discontiguous memory blocks can mitigate these problems
transparently. This design however incurs significant
overhead, but is justified when real-time deadlines and
space constraints trump performance.\par
This paper proposes {\em z-rays}, a discontiguous array
design with flexibility and efficiency. A z-ray has a
spine with indirection pointers to fixed-size memory
blocks called {\em arraylets}, and uses five
optimizations: (1) inlining the first N array bytes
into the spine, (2) lazy allocation, (3) zero
compression, (4) fast array copy, and (5) arraylet
copy-on-write. Whereas discontiguous arrays in prior
work improve responsiveness and space efficiency,
z-rays combine time efficiency and flexibility. On
average, the best z-ray configuration performs within
12.7\% of an unmodified Java Virtual Machine on 19
benchmarks, whereas previous designs have {\em two to
three times\/} higher overheads. Furthermore, language
implementers can configure z-ray optimizations for
various design goals. This combination of performance
and flexibility creates a better building block for
past and future array optimization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "arraylets; arrays; compression; heap; z-rays",
}
@Article{Acar:2010:TDT,
author = "Umut A. Acar and Guy Blelloch and Ruy Ley-Wild and
Kanat Tangwongsan and Duru Turkoglu",
title = "Traceable data types for self-adjusting computation",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "483--496",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806596.1806650",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Self-adjusting computation provides an evaluation
model where computations can respond automatically to
modifications to their data by using a mechanism for
propagating modifications through the computation.
Current approaches to self-adjusting computation
guarantee correctness by recording dependencies in a
trace at the granularity of individual memory
operations. Tracing at the granularity of memory
operations, however, has some limitations: it can be
asymptotically inefficient (\eg, compared to optimal
solutions) because it cannot take advantage of
problem-specific structure, it requires keeping a large
computation trace (often proportional to the runtime of
the program on the current input), and it introduces
moderately large constant factors in practice.\par
In this paper, we extend dependence-tracing to work at
the granularity of the query and update operations of
arbitrary (abstract) data types, instead of just reads
and writes on memory cells. This can significantly
reduce the number of dependencies that need to be kept
in the trace and followed during an update. We define
an interface for supporting a traceable version of a
data type, which reports the earliest query that
depends on (is changed by) revising operations back in
time, and implement several such structures, including
priority queues, queues, dictionaries, and counters. We
develop a semantics for tracing, extend an existing
self-adjusting language, $\Delta$ML, and its
implementation to support traceable data types, and
present an experimental evaluation by considering a
number of benchmarks. Our experiments show dramatic
improvements on space and time, sometimes by as much as
two orders of magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "self-adjusting computation; traceable data types",
}
@Article{Chen:2010:TTT,
author = "Peter M. Chen",
title = "Transistors to toys: teaching systems to freshmen",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "1--2",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1735998",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "How should we introduce students to the art of system
building, and when are students ready to start
designing and building interesting systems? In this
talk, I describe an experimental course at the
University of Michigan that teaches systems to freshmen
by having them conceive of, design, and build the
hardware and software of a microprocessor-based
educational toy. Students in this course build their
own microprocessor on an FPGA using a hardware
description language. They then write the complete
software stack for their toy in assembly language,
including device drivers for numerous I/O devices, a
simple file system, a graphical user interface, digital
audio processing, and application software. By building
a substantial system involving hardware, system
software, and application software, students gain an
appreciation for the complexity and beauty of building
computing systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "education",
}
@Article{Pohle:2010:CWM,
author = "Aaron Pohle and Bj{\"o}rn D{\"o}bel and Michael
Roitzsch and Hermann H{\"a}rtig",
title = "Capability wrangling made easy: debugging on a
microkernel with {{\tt valgrind}}",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "3--12",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1736001",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Not all operating systems are created equal.
Contrasting traditional monolithic kernels, there is a
class of systems called microkernels more prevalent in
embedded systems like cellphones, chip cards or
real-time controllers. These kernels offer an
abstraction very different from the classical POSIX
interface. The resulting unfamiliarity for programmers
complicates development and debugging. Valgrind is a
well-known debugging tool that virtualizes execution to
perform dynamic binary analysis. However, it assumes to
run on a POSIX-like kernel and closely interacts with
the system to control execution. In this paper we
analyze how to adapt Valgrind to a non-POSIX
environment and describe our port to the Fiasco. OC
microkernel. Additionally, we analyze bug classes that
are indigenous to capability systems and show how
Valgrind's flexibility can be leveraged to create
custom debugging tools detecting these errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "capability; l4; microkernel; valgrind",
}
@Article{Chow:2010:MSR,
author = "Jim Chow and Dominic Lucchetti and Tal Garfinkel and
Geoffrey Lefebvre and Ryan Gardner and Joshua Mason and
Sam Small and Peter M. Chen",
title = "Multi-stage replay with {Crosscut}",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "13--24",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1736002",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deterministic record-replay has many useful
applications, ranging from fault tolerance and
forensics to reproducing and diagnosing bugs. When
choosing a record-replay solution, the system
administrator must choose a priori how comprehensively
to record the execution and at what abstraction level
to record it. Unfortunately, these choices may not
match well with how the recording is eventually used. A
recording may contain too little information to support
the end use of replay, or it may contain more sensitive
information than is allowed to be shown to the end user
of replay. Similarly, fixing the abstraction level at
the time of recording often leads to a semantic
mismatch with the end use of replay.\par
This paper describes how to remedy these problems by
adding customizable replay stages to create
special-purpose logs for the end users of replay. Our
system, called Crosscut, allows replay logs to be
'sliced' along time and abstraction boundaries. Using
this approach, users can create slices that include
only the processes, applications, or components of
interest, excluding parts that handle sensitive data.
Users can also retarget the abstraction level of the
replay log to higher-level platforms, such as Perl or
Valgrind. Execution can then be augmented with
additional analysis code at replay time, without
disturbing the replayed components in the slice.
Crosscut thus uses replay itself to transform logs into
a more efficient, secure, and usable form for
replay-based applications.\par
Our current Crosscut prototype builds on VMware
Workstation's record-replay capabilities, and supports
a variety of different replay environments. We show how
Crosscut can create slices of only the parts of the
computation of interest and thereby avoid leaking
sensitive information, and we show how to retarget the
abstraction level of the log to enable more convenient
use during replay debugging.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "design; experimentation; performance; replay;
security; virtual machines",
}
@Article{Huang:2010:OCD,
author = "Yijian Huang and Haibo Chen and Binyu Zang",
title = "Optimizing crash dump in virtualized environments",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "25--36",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1736003",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Crash dump, or core dump is the typical way to save
memory image on system crash for future offline
debugging and analysis. However, for typical server
machines with likely abundant memory, the time of core
dump can significantly increase the mean time to repair
(MTTR) by delaying the reboot-based recovery, while not
dumping the failure context for analysis would risk
recurring crashes on the same problems.\par
In this paper, we propose several optimization
techniques for core dump in virtualized environments,
in order to shorten the MTTR of consolidated virtual
machines during crashes. First, we parallelize the
process of crash dump and the process of rebooting the
crashed VM, by dynamically reclaiming and allocating
memory between the crashed VM and the newly spawned VM.
Second, we use the virtual machine management layer to
introspect the critical data structures of the crashed
VM to filter out the dump of unused memory. Finally, we
implement disk I/O rate control between core dump and
the newly spawned VM according to user-tuned rate
control policy to balance the time of crash dump and
quality of services in the recovery VM.\par
We have implemented a working prototype, Vicover, that
optimizes core dump on system crash of a virtual
machine in Xen, to minimize the MTTR of core dump and
recovery as a whole. In our experiment on a virtualized
TPC-W server, Vicover shortens the downtime caused by
crash dump by around 5X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "core dump; parallel core dump; virtual machines",
}
@Article{Hunt:2010:LBS,
author = "Galen C. Hunt",
title = "Looking beyond a singularity",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "37--38",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1735999",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "How does one build a truly dependable software system?
Seven years ago, Microsoft Research started the
Singularity project to answer this question. The
premise was to start with the best known software
development tools and to build a new kind of operating
system from the ground up. The operating system was to
be both an output artifact and a laboratory for the
research. Portions of the code and ideas have been
incorporated into three separate Microsoft operating
systems so far. I will give a brief overview of
Singularity planned and built, then describe what we
learned, both positive and negative. I will speculate
on OS futures including current research to build an
operating system in which every last assembly
instruction has been verified for type safety, a system
for truly mobile computation, and new tools for
automatically restructuring large software systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "sing\#; singularity; software-isolated processes
(sips)",
}
@Article{Titzer:2010:ICR,
author = "Ben L. Titzer and Thomas W{\"u}rthinger and Doug Simon
and Marcelo Cintra",
title = "Improving compiler-runtime separation with {XIR}",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "39--50",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1736005",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Intense research on virtual machines has highlighted
the need for flexible software architectures that allow
quick evaluation of new design and implementation
techniques. The interface between the compiler and
runtime system is a principal factor in the flexibility
of both components and is critical to enabling rapid
pursuit of new optimizations and features. Although
many virtual machines have demonstrated modularity for
many components, significant dependencies often remain
between the compiler and the runtime system components
such as the object model and memory management system.
This paper addresses this challenge with a carefully
designed strict compiler-runtime interface and the XIR
language. Instead of the compiler backend lowering
object operations to machine operations using
hard-wired runtime-specific logic, XIR allows the
runtime system to implement this logic, simultaneously
simplifying and separating the backend from
runtime-system details. In this paper we describe the
design and implementation of this compiler-runtime
interface and the XIR language in the C1X dynamic
compiler, a port of the HotSpotTM Client compiler. Our
results show a significant reduction in backend
complexity with XIR and an overall reduction in the
compiler-runtime interface complexity while still
generating comparable quality code with only minor
impact on compilation time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compilers; intermediate representations; java; JIT;
lowering; object model; register allocation; runtime
interface; software architecture; virtual machines",
}
@Article{Geoffray:2010:VSM,
author = "Nicolas Geoffray and Ga{\"e}l Thomas and Julia Lawall
and Gilles Muller and Bertil Folliot",
title = "{VMKit}: a substrate for managed runtime
environments",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "51--62",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1736006",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Managed Runtime Environments (MREs), such as the JVM
and the CLI, form an attractive environment for program
execution, by providing portability and safety, via the
use of a bytecode language and automatic memory
management, as well as good performance, via
just-in-time (JIT) compilation. Nevertheless,
developing a fully featured MRE, including e.g. a
garbage collector and JIT compiler, is a herculean
task. As a result, new languages cannot easily take
advantage of the benefits of MREs, and it is difficult
to experiment with extensions of existing MRE based
languages.\par
This paper describes and evaluates VMKit, a first
attempt to build a common substrate that eases the
development of high-level MREs. We have successfully
used VMKit to build two MREs: a Java Virtual Machine
and a Common Language Runtime. We provide an extensive
study of the lessons learned in developing this
infrastructure, and assess the ease of implementing new
MREs or MRE extensions and the resulting performance.
In particular, it took one of the authors only one
month to develop a Common Language Runtime using VMKit.
VMKit furthermore has performance comparable to the
well-established open-source MREs Cacao, Apache Harmony
and Mono, and is 1.2 to 3 times slower than JikesRVM on
most of the Dacapo benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "just in time compiler; virtual machine; VMKit",
}
@Article{Zhang:2010:NSS,
author = "Qing Zhang and John McCullough and Justin Ma and Nabil
Schear and Michael Vrable and Amin Vahdat and Alex
C. Snoeren and Geoffrey M. Voelker and Stefan Savage",
title = "{Neon}: system support for derived data management",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "63--74",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1736008",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern organizations face increasingly complex
information management requirements. A combination of
commercial needs, legal liability and regulatory
imperatives has created a patchwork of mandated
policies. Among these, personally identifying customer
records must be carefully access-controlled, sensitive
files must be encrypted on mobile computers to guard
against physical theft, and intellectual property must
be protected from both exposure and 'poisoning.'
However, enforcing such policies can be quite difficult
in practice since users routinely share data over
networks and derive new files from these
inputs--incidentally laundering any policy
restrictions. In this paper, we describe a virtual
machine monitor system called Neon that transparently
labels derived data using byte-level 'tints' and tracks
these labels end to end across commodity applications,
operating systems and networks. Our goal with Neon is
to explore the viability and utility of transparent
information flow tracking within conventional networked
systems when used in the manner in which they were
intended. We demonstrate that this mechanism allows the
enforcement of a variety of data management policies,
including data-dependent confinement, mandatory I/O
encryption, and intellectual property management.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "difc; memory tainting; qemu; virtualization; xen",
}
@Article{Ye:2010:EES,
author = "Lei Ye and Gen Lu and Sushanth Kumar and Chris Gniady
and John H. Hartman",
title = "Energy-efficient storage in virtual machine
environments",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "75--84",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1736009",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Current trends in increasing storage capacity and
virtualization of resources combined with the need for
energy efficiency put a challenging task in front of
system designers. Previous studies have suggested many
approaches to reduce hard disk energy dissipation in
native OS environments; however, those mechanisms do
not perform well in virtual machine environments
because a virtual machine (VM) and the virtual machine
monitor (VMM) that runs it have different semantic
contexts. This paper explores the disk I/O activities
between VMM and VMs using trace driven simulation to
understand the I/O behavior of the VM system.
Subsequently, this paper proposes three mechanisms to
address the isolation between VMM and VMs, and increase
the burstiness of hard disk accesses to increase energy
efficiency of a hard disk. Compared to standard
shutdown mechanisms, with eight VMs the proposed
mechanisms reduce disk spin-ups, increase the disk
sleep time, and reduce energy consumption by 14.8\%
with only 0.5\% increase in execution time. We
implemented the proposed mechanisms in Xen and
validated our simulation results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "energy management; storage system; virtual machine",
}
@Article{Kazempour:2010:AAA,
author = "Vahid Kazempour and Ali Kamali and Alexandra
Fedorova",
title = "{AASH}: an asymmetry-aware scheduler for hypervisors",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "85--96",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1736011",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Asymmetric multicore processors (AMP) consist of cores
exposing the same instruction-set architecture (ISA)
but varying in size, frequency, power consumption and
performance. AMPs were shown to be more power efficient
than conventional symmetric multicore processors, and
it is therefore likely that future multicore systems
will include cores of different types. AMPs derive
their efficiency from core specialization: instruction
streams can be assigned to run on the cores best suited
to their demands for architectural resources. System
efficiency is improved as a result. To perform
effective matching of threads to cores, the thread
scheduler must be asymmetry-aware; and while
asymmetry-aware schedulers for operating systems are a
well studied topic, asymmetry-awareness in hypervisors
has not been addressed. A hypervisor must be
asymmetry-aware to enable proper functioning of
asymmetry-aware guest operating systems; otherwise they
will be ineffective in virtual environments.
Furthermore, a hypervisor must ensure that asymmetric
cores are shared among multiple guests in a fair
fashion or in accordance with their
priorities.\par
This work for the first time implements simple changes
to the hypervisor scheduler, required to make it
asymmetry-aware, and evaluates the benefits and
overheads of these asymmetry-aware mechanisms. Our
evaluation was performed using an open source
hypervisor Xen on a real multicore system where
asymmetry was emulated via CPU frequency scaling. We
compared the asymmetry-aware hypervisor to default Xen.
Our results indicate that asymmetry support can be
implemented with low overheads, and resulting
performance improvements can be significant, reaching
up to 36\% in our experiments. Most performance
improvements are derived from the fact that an
asymmetry-aware hypervisor ensures that the fast cores
do not go idle before slow cores and from the fact that
it maps virtual cores to physical cores for
asymmetry-aware guests according to the guest's
expectations. Other benefits from asymmetry awareness
are fairer sharing of computing resources among VMs and
more stable execution times.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "asymmetric; heterogeneous; hypervisor; multicore
processors; scheduling algorithms; virtual machine
monitor",
}
@Article{Lee:2010:SSR,
author = "Min Lee and A. S. Krishnakumar and P. Krishnan and
Navjot Singh and Shalini Yajnik",
title = "Supporting soft real-time tasks in the {Xen}
hypervisor",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "97--108",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1736012",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Virtualization technology enables server consolidation
and has given an impetus to low-cost green data
centers. However, current hypervisors do not provide
adequate support for real-time applications, and this
has limited the adoption of virtualization in some
domains. Soft real-time applications, such as
media-based ones, are impeded by components of
virtualization including low-performance virtualization
I/O, increased scheduling latency, and shared-cache
contention. The virtual machine scheduler is central to
all these issues. The goal in this paper is to adapt
the virtual machine scheduler to be more soft-real-time
friendly.\par
We improve two aspects of the VMM scheduler -- managing
scheduling latency as a first-class resource and
managing shared caches. We use enterprise IP telephony
as an illustrative soft real-time workload and design a
scheduler S that incorporates the knowledge of soft
real-time applications in {\em all\/} aspects of the
scheduler to support responsiveness. For this we first
define a {\em laxity\/} value that can be interpreted
as the target scheduling latency that the workload
desires. The load balancer is also designed to minimize
the latency for real-time tasks. For cache management,
we take cache-affinity into account for real time tasks
and load-balance accordingly to prevent cache
thrashing. We measured cache misses and demonstrated
that cache management is essential for soft real time
tasks. Although our scheduler S employs a different
design philosophy, interestingly enough it can be
implemented with simple modifications to the Xen
hypervisor's credit scheduler. Our experiments
demonstrate that the Xen scheduler with our
modifications can support soft real-time guests well,
without penalizing non-real-time domains.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "enterprise telephony workloads; laxity; server
consolidation; virtualization; xen",
}
@Article{Odaira:2010:ERT,
author = "Rei Odaira and Kazunori Ogata and Kiyokuni Kawachiya
and Tamiya Onodera and Toshio Nakatani",
title = "Efficient runtime tracking of allocation sites in
{Java}",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "109--120",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1736014",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Tracking the allocation site of every object at
runtime is useful for reliable, optimized Java. To be
used in production environments, the tracking must be
accurate with minimal speed loss. Previous approaches
suffer from performance degradation due to the
additional field added to each object or track the
allocation sites only probabilistically. We propose two
novel approaches to track the allocation sites of every
object in Java with only a 1.0\% slow-down on average.
Our first approach, the {\em
Allocation-Site-as-a-Hash-code (ASH) Tracker}, encodes
the allocation site ID of an object into the hash code
field of its header by regarding the ID as part of the
hash code. ASH Tracker avoids an excessive increase in
hash code collisions by dynamically shrinking the
bit-length of the ID as more and more objects are
allocated at that site. For those Java VMs without the
hash code field, our second approach, the {\em
Allocation-Site-via-a-Class-pointer (ASC) Tracker},
makes the class pointer field in an object header refer
to the allocation site structure of the object, which
in turn points to the actual class structure. ASC
Tracker mitigates the indirection overhead by
constant-class-field duplication and allocation-site
equality checks. While a previous approach of adding a
4-byte field caused up to 14.4\% and an average 5\%
slowdown, both ASH and ASC Trackers incur at most a
2.0\% and an average 1.0\% loss. We demonstrate the
usefulness of our low-overhead trackers by an
allocation-site-aware memory leak detector and
allocation-site-based pretenuring in generational GC.
Our pretenuring achieved on average 1.8\% and up to
11.8\% speedups in SPECjvm2008.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "allocation site; hash code; memory allocation",
}
@Article{Tatsubori:2010:EJT,
author = "Michiaki Tatsubori and Akihiko Tozawa and Toyotaro
Suzumura and Scott Trent and Tamiya Onodera",
title = "Evaluation of a just-in-time compiler retrofitted for
{PHP}",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "121--132",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1736015",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers who develop Web applications often use
dynamic scripting languages such as Perl, PHP, Python,
and Ruby. For general purpose scripting language usage,
interpreter-based implementations are efficient and
popular but the server-side usage for Web application
development implies an opportunity to significantly
enhance Web server throughput. This paper summarizes a
study of the optimization of PHP script processing. We
developed a PHP processor, P9, by adapting an existing
production-quality just-in-time (JIT) compiler for a
Java virtual machine, for which optimization
technologies have been well-established, especially for
server-side application. This paper describes and
contrasts microbenchmarks and SPECweb2005 benchmark
results for a well-tuned configuration of a traditional
PHP interpreter and our JIT compiler-based
implementation, P9. Experimental results with the
microbenchmarks show 2.5-9.5x advantage with P9, and
the SPECweb2005 measurements show about 20-30\%
improvements. These results show that the acceleration
of dynamic scripting language processing does matter in
a realistic Web application server environment. CPU
usage profiling shows our simple JIT compiler
introduction reduces the PHP core runtime overhead from
45\% to 13\% for a SPECweb2005 scenario, implying that
further improvements of dynamic compilers would provide
little additional return unless other major overheads
such as heavy memory copy between the language runtime
and Web server frontend are reduced.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic scripting languages; just-in-time compiler;
php",
}
@Article{Namjoshi:2010:NOP,
author = "Manjiri A. Namjoshi and Prasad A. Kulkarni",
title = "Novel online profiling for virtual machines",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "133--144",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1736016",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Application {\em profiling\/} is a popular technique
to improve program performance based on its behavior.
{\em Offline\/} profiling, although beneficial for
several applications, fails in cases where prior
program runs may not be feasible, or if changes in
input cause the profile to not match the behavior of
the actual program run. Managed languages, like Java
and C\\#, provide a unique opportunity to overcome the
drawbacks of offline profiling by generating the
profile information online during the current program
run. Indeed, online profiling is extensively used in
current VMs, especially during selective compilation to
improve program {\em startup\/} performance, as well as
during other feedback-directed optimizations.\par
In this paper we illustrate the drawbacks of the
current {\em reactive\/} mechanism of online profiling
during selective compilation. Current VM profiling
mechanisms are slow -- thereby delaying associated
transformations, and estimate future behavior based on
the program's immediate past -- leading to potential
misspeculation that limit the benefits of compilation.
We show that these drawbacks produce an average
performance loss of over 14.5\% on our set of benchmark
programs, over an {\em ideal offline\/} approach that
accurately compiles the hot methods early. We then
propose and evaluate the potential of a novel strategy
to achieve similar performance benefits with an online
profiling approach. Our new online profiling strategy
uses early determination of loop iteration bounds to
predict future method hotness. We explore and present
promising results on the potential, feasibility, and
other issues involved for the successful implementation
of this approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "java; online profiling; virtual machines",
}
@Article{Guha:2010:DPS,
author = "Apala Guha and Kim hazelwood and Mary Lou Soffa",
title = "{DBT} path selection for holistic memory efficiency
and performance",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "145--156",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837854.1736018",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic binary translators(DBTs) provide powerful
platforms for building dynamic program monitoring and
adaptation tools. DBTs, however, have high memory
demands because they cache translated code and
auxiliary code to a software code cache and must also
maintain data structures to support the code cache. The
high memory demands make it difficult for
memory-constrained embedded systems to take advantage
of DBT-based tools. Previous research on DBT memory
management focused on the translated code and auxiliary
code only. However, we found that data structures are
comparable to the code cache in size. We show that the
translated code size, auxiliary code size and the data
structure size interact in a complex manner, depending
on the path selection (trace selection and link
formation) strategy. Therefore, holistic memory
efficiency (comprising translated code, auxiliary code
and data structures) cannot be improved by focusing on
the code cache only. In this paper, we use path
selection for improving holistic memory efficiency
which in turn impacts performance in memory-constrained
environments. Although there has been previous research
on path selection, such research only considered
performance in memory-unconstrained
environments.\par
The challenge for holistic memory efficiency is that
the path selection strategy results in complex
interactions between the memory demand components.
Also, individual aspects of path selection and the
holistic memory efficiency may impact performance in
complex ways. We explore these interactions to motivate
path selection targeting holistic memory demand. We
enumerate all the aspects involved in a path selection
design and evaluate a comprehensive set of approaches
for each aspect. Finally, we propose a path selection
strategy that reduces memory demands by 20\% and at the
same time improves performance by 5-20\% compared to an
industrial-strength DBT.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic binary translation; embedded systems; memory
management; path selection; virtual machines",
}
@Article{Kondoh:2010:DBT,
author = "Goh Kondoh and Hideaki Komatsu",
title = "Dynamic binary translation specialized for embedded
systems",
journal = j-SIGPLAN,
volume = "45",
number = "7",
pages = "157--166",
month = jul,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735997.1736019",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:01 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes the design and implementation of
a novel dynamic binary translation technique
specialized for embedded systems. Virtual platforms
have been widely used to develop embedded software and
dynamic binary translation is essential to boost their
speed in simulations. However, unlike application
simulation, the code generated for systems simulation
is still slow because the simulator must replicate all
of the functions of the target hardware. Embedded
systems, which focus on providing one or a few
functions, utilize only a small portion of the
processor's features most of the time. For example,
they may use a Memory Management Unit (MMU) in a
processor to map physical memory to effective
addresses, but they may not need paged memory support
as in an OS. We can exploit this to specialize the
dynamically translated code for more
performance.\par
We built a specialization framework on top of a
functional simulator with a dynamic binary translator.
Using the framework, we implemented three specializers
for an MMU, bi-endianness, and register banks.
Experiments with the EEMBC1.1 benchmark showed that the
speed of the specialized code was up to 39\% faster
than the unspecialized code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic binary translation; embedded systems; partial
evaluation; specialization",
}
@Article{Barabash:2010:TGC,
author = "Katherine Barabash and Erez Petrank",
title = "Tracing garbage collection on highly parallel
platforms",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "1--10",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837855.1806653",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The pervasiveness of multiprocessor and multicore
hardware and the rising level of available parallelism
are radically changing the computing landscape. Can
software deal with tomorrow's potential higher
parallelism? In this paper we study this issue from the
garbage collection perspective. In particular, we
investigate the scalability of parallel heap tracing,
which stands at the core of the garbage collection
activity. Heap shapes can be sequential in nature, and
prevent the collector from scaling the trace. We start
by proposing the idealized trace utilization as a
scalability measure for evaluating the scalability of a
given heap shape. We then examine standard Java
benchmarks and evaluate the existence of non-scalable
object-graph shapes in their execution. Next, we
propose and implement a prototype of garbage collection
techniques that attempt to ameliorate the object-graph
shape problem. Finally, we measure and report their
efficacy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "garbage collection; memory management; parallel
garbage collection; runtime systems",
}
@Article{Siebert:2010:CPR,
author = "Fridtjof Siebert",
title = "Concurrent, parallel, real-time garbage-collection",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "11--20",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837855.1806654",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the current developments in CPU implementations,
it becomes obvious that ever more parallel multicore
systems will be used even in embedded controllers that
require real-time guarantees. When garbage collection
is used in these systems, parallel and concurrent
garbage collection brings important performance
advantages in the average case. In a real-time system,
however, guarantees on the GC's performance in the
worst case are required.\par
This paper explains how the single-CPU real-time GC of
the Java implementation JamaicaVM was changed to make
it a hard real-time garbage collector that is parallel
and concurrent. Parallel means that an arbitrary number
of CPUs may perform GC work in parallel, while
concurrent means that the GC work can be performed
concurrently to the application code without
pre-empting the application. In addition, the single
units of work that this garbage collector has to
perform are very small and uniform and the total amount
of GC work is bounded by a function of the heap size,
such that it becomes possible for any application that
has a bounded amount of reachable memory to run the GC
work such that sufficient GC progress can be ensured
for the application never to run out of heap space.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "concurrent; garbage collection; java; multicore;
parallel; real-time",
}
@Article{Anderson:2010:OPN,
author = "Todd A. Anderson",
title = "Optimizations in a private nursery-based garbage
collector",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "21--30",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837855.1806655",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes a garbage collector designed
around the use of permanent, private, thread-local
nurseries and is principally oriented towards
functional languages. We try to maximize the cache hit
rate by having threads continually reuse their
individual private nurseries. These private nurseries
operate in such a way that they can be garbage
collected independently of other threads, which creates
low collection pause times. Objects which survive
thread-local collections are moved to a mature
generation that can be collected either concurrently or
in a stop-the-world fashion. We describe several
optimizations (including two dynamic control parameter
adaptation schemes) related to garbage collecting the
private nurseries and to our concurrent collector, some
of which are made possible when the language provides
mutability information. We tested our collector against
six benchmarks and saw single-threaded performance
improvements in the range of 5-74\%. We also saw a 10x
increase (for 24 processors) in scalability for one
parallel benchmark that had previously been
memory-bound.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "functional languages; garbage collection",
}
@Article{Nagarakatte:2010:CCE,
author = "Santosh Nagarakatte and Jianzhou Zhao and Milo M. K.
Martin and Steve Zdancewic",
title = "{CETS}: compiler enforced temporal safety for {C}",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "31--40",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837855.1806657",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Temporal memory safety errors, such as dangling
pointer dereferences and double frees, are a prevalent
source of software bugs in unmanaged languages such as
C. Existing schemes that attempt to retrofit temporal
safety for such languages have high runtime overheads
and/or are incomplete, thereby limiting their
effectiveness as debugging aids. This paper presents
CETS, a compile-time transformation for detecting all
violations of temporal safety in C programs. Inspired
by existing approaches, CETS maintains a unique
identifier with each object, associates this metadata
with the pointers in a disjoint metadata space to
retain memory layout compatibility, and checks that the
object is still allocated on pointer dereferences. A
formal proof shows that this is sufficient to provide
temporal safety even in the presence of arbitrary casts
if the program contains no spatial safety violations.
Our CETS prototype employs both temporal check removal
optimizations and traditional compiler optimizations to
achieve a runtime overhead of just 48\% on average.
When combined with a spatial-checking system, the
average overall overhead is 116\% for complete memory
safety",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "c; dangling pointers; memory safety; temporal errors",
}
@Article{Vechev:2010:PPC,
author = "Martin Vechev and Eran Yahav and Greta Yorsh",
title = "{PHALANX}: parallel checking of expressive heap
assertions",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "41--50",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837855.1806658",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Unrestricted use of heap pointers makes software
systems difficult to understand and to debug. To
address this challenge, we developed PHALANX -- a
practical framework for dynamically checking expressive
heap properties such as ownership, sharing and
reachability. PHALANX uses novel parallel algorithms to
efficiently check a wide range of heap properties
utilizing the available cores.\par
PHALANX runtime is implemented on top of IBM's Java
production virtual machine. This has enabled us to
apply our new techniques to real world software. We
checked expressive heap properties in various scenarios
and found the runtime support to be valuable for
debugging and program understanding. Further, our
experimental results on DaCapo and other benchmarks
indicate that evaluating heap queries using parallel
algorithms can lead to significant performance
improvements, often resulting in linear speedups as the
number of cores increases.\par
To encourage adoption by programmers, we extended an
existing JML compiler to translate expressive JML
assertions about the heap into their efficient
implementation provided by PHALANX. To debug her
program, a programmer can annotate it with expressive
heap assertions in JML, that are efficiently checked by
PHALANX.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "ownership; parallel garbage collector; virtual
machine",
}
@Article{Sewell:2010:MEA,
author = "Peter Sewell",
title = "Memory, an elusive abstraction",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "51--52",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806660",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multiprocessors are now ubiquitous. They provide an
abstraction of shared memory, accessible by
concurrently executing threads, which supports a wide
range of software. However, exactly what this key
abstraction is -- what the hardware designers
implement, and what programmers can depend on -- is
surprisingly elusive. In 1979, when articulating the
notion of sequential consistency (SC), Lamport wrote
'For some applications, achieving sequential
consistency may not be worth the price of slowing down
the processors.' [7], and indeed most major
multiprocessor families, including Alpha, ARM, Itanium,
Power, Sparc, and x86, do not provide the abstraction
of SC memory. Internally, they incorporate a range of
sophisticated optimisations which have various
programmer-visible effects. For some (such as Sparc)
these effects are captured in a well-defined relaxed
memory model, making it possible (if challenging) to
reason with confidence about the behaviour of
concurrent programs. For others, however, it has been
very unclear what a reasonable model is, despite
extensive research over the last three decades. In this
talk, I will reflect on the experience of my colleagues
and I in trying to establish usable models for x86
multiprocessors, where it appears that our x86-TSO
model suffices for common-case code [1-4], and for
Power and ARM multiprocessors, where we have models
that capture some but not all aspects of their
behaviour [5,6]. The underlying causes of these
difficulties are complex, including:\par
The programmer-observable relaxed-memory behaviour of a
multiprocessor is a whole-system property that arises
from the interaction between many complex aspects of
the processor implementation: speculative execution,
store buffering, cache protocol, and so forth.
Programs are executed (and tested) on specific
multiprocessor implementations, but processor vendors
attempt to document loose specifications to cover a
range of possible (past and future)
implementations
Multiprocessor implementation details are typically
confidential and may change radically from one
implementation to another
Vendor specifications suffer from the tension between
the need for loose specification, to preserve freedom
for such changes, and the need for tight specification,
to give strong properties to properties
All too often, loose specification has been achieved by
vague specification, using informal prose. When it
comes to subtle concurrent properties this is almost
inevitably ambiguous; it also makes it impossible (even
in principle) to test conformance between a processor
implementation and such a specification, let alone to
verify such a correspondence or to reason about
concurrent programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "relaxed memory models; semantics",
}
@Article{Petricek:2010:CHG,
author = "Tomas Petricek and Don Syme",
title = "Collecting {Hollywood}'s garbage: avoiding space-leaks
in composite events",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "53--62",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837855.1806662",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The reactive programming model is largely different to
what we're used to as we don't have full control over
the application's control flow. If we mix the
declarative and imperative programming style, which is
usual in the ML family of languages, the situation is
even more complex. It becomes easy to introduce
patterns where the usual garbage collector for objects
cannot automatically dispose all components that we
intuitively consider garbage.\par
In this paper we discuss a duality between the
definitions of garbage for {\em objects\/} and {\em
events}. We combine them into a single one, to specify
the notion of garbage for reactive programming model in
a mixed functional/imperative language and we present a
formal algorithm for collecting garbage in this
environment.\par
Building on top of the theoretical model, we implement
a library for reactive programming that does not cause
leaks when used in the mixed declarative/imperative
model. The library allows us to safely combine both of
the reactive programming patterns. As a result, we can
take advantage of the clarity and simplicity of the
declarative approach as well as the expressivity of the
imperative model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "combinator libraries; duality; event-driven; garbage
collection; inversion of control; reactive
programming",
}
@Article{Tian:2010:SPU,
author = "Chen Tian and Min Feng and Rajiv Gupta",
title = "Speculative parallelization using state separation and
multiple value prediction",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "63--72",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806663",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the availability of chip multiprocessor (CMP) and
simultaneous multithreading (SMT) machines, extracting
thread level parallelism from a sequential program has
become crucial for improving performance. However, many
sequential programs cannot be easily parallelized due
to the presence of dependences. To solve this problem,
different solutions have been proposed. Some of them
make the optimistic assumption that such dependences
rarely manifest themselves at runtime. However, when
this assumption is violated, the recovery causes very
large overhead. Other approaches incur large
synchronization or computation overhead when resolving
the dependences. Consequently, for a loop with
frequently arising cross-iteration dependences,
previous techniques are not able to speed up the
execution. In this paper we propose a compiler
technique which uses state separation and multiple
value prediction to speculatively parallelize loops in
sequential programs that contain frequently arising
cross-iteration dependences. The key idea is to
generate multiple versions of a loop iteration based on
multiple predictions of values of variables involved in
cross-iteration dependences (i.e., live-in variables).
These speculative versions and the preceding loop
iteration are executed in separate memory states
simultaneously. After the execution, if one of these
versions is correct (i.e., its predicted values are
found to be correct), then we merge its state and the
state of the preceding iteration because the dependence
between the two iterations is correctly resolved. The
memory states of other incorrect versions are
completely discarded. Based on this idea, we further
propose a runtime adaptive scheme that not only gives a
good performance but also achieves better CPU
utilization. We conducted experiments on 10 benchmark
programs on a real machine. The results show that our
technique can achieve 1.7x speedup on average across
all used benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "multicore processors; speculative parallelization",
}
@Article{Ugawa:2010:IRB,
author = "Tomoharu Ugawa and Hideya Iwasaki and Taiichi Yuasa",
title = "Improved replication-based incremental garbage
collection for embedded systems",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "73--82",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806664",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We have developed an incremental compacting garbage
collector for embedded Java systems. The collector
divides the heap into equal sized pages and uses the
segregated free lists for fast allocation. Collectors
that have such a heap layout have a problem of
fragmentation in allocating objects larger than the
page size. We solve this problem by using the
replication-based incremental compaction. The compactor
evacuates all objects in one area, the evacuation area,
of the heap, thereby creating a large chunk of free
space. We developed an algorithm for choosing the
evacuation area that effectively cures fragmentation.
The compactor does not use any read-barriers. Instead,
it uses a technique similar to the replication-based
incremental copying collection. This needs forwarding
pointers for all evacuated objects. Rather than
introducing an extra field for each object, we use a
hash table to store forwarding pointers.\par
Evaluation of this garbage collector implemented in
Sun's J2ME Java Virtual Machine showed that all the
benchmarks used were able to run without memory
starvation using the heap sizes of only 151\%-286\% of
the maximum amount of live data plus 8 KB of the hash
table. Experiments on a desktop computer, though it is
not a platform for embedded systems, showed that the
maximum pause time was shorter than 200 &\#956;s, which
was comparable to that of our implementation of the
snapshot-at-the-beginning collector without compaction.
On an ARM processor, the runtime overhead was 1\%-16\%
with 8.0\% on average compared to the mark-sweep
collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "compaction; embedded systems; fragmentation; garbage
collection; real-time garbage collection",
}
@Article{Hellyer:2010:LCW,
author = "Laurence Hellyer and Richard Jones and Antony L.
Hosking",
title = "The locality of concurrent write barriers",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "83--92",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806666",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent and incremental collectors require barriers
to ensure correct synchronisation between mutator and
collector. The overheads imposed by particular barriers
on particular systems have been widely studied.
Somewhat fewer studies have also compared barriers in
terms of their termination properties or the volume of
floating garbage they generate. Until now, the
consequences for locality of different barrier choices
has not been studied, although locality will be of
increasing importance for emerging architectures. This
paper provides a study of the locality of concurrent
write barriers, independent of the processor
architecture, virtual machine, compiler or garbage
collection algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "garbage collection; java; language implementation;
memory management",
}
@Article{Zhao:2010:EMS,
author = "Qin Zhao and Derek Bruening and Saman Amarasinghe",
title = "Efficient memory shadowing for 64-bit architectures",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "93--102",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806667",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Shadow memory is used by dynamic program analysis
tools to store metadata for tracking properties of
application memory. The efficiency of mapping between
application memory and shadow memory has substantial
impact on the overall performance of such analysis
tools. However, traditional memory mapping schemes that
work well on 32-bit architectures cannot easily port to
64-bit architectures due to the much larger 64-bit
address space.\par
This paper presents EMS64, an efficient memory
shadowing scheme for 64-bit architectures. By taking
advantage of application reference locality and unused
regions in the 64-bit address space, EMS64 provides a
fast and flexible memory mapping scheme without relying
on any underlying platform features or requiring any
specific shadow memory size. Our experiments show that
EMS64 is able to reduce the runtime shadow memory
translation overhead to 81\% on average, which almost
halves the overhead of the fastest 64-bit shadow memory
system we are aware of.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "dynamic optimization; shadow memory",
}
@Article{Singer:2010:EGC,
author = "Jeremy Singer and Richard E. Jones and Gavin Brown and
Mikel Luj{\'a}n",
title = "The economics of garbage collection",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "103--112",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806669",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper argues that economic theory can improve our
understanding of memory management. We introduce the
{\em allocation curve}, as an analogue of the demand
curve from microeconomics. An allocation curve for a
program characterises how the amount of garbage
collection activity required during its execution
varies in relation to the heap size associated with
that program. The standard treatment of microeconomic
demand curves (shifts and elasticity) can be applied
directly and intuitively to our new allocation curves.
As an application of this new theory, we show how {\em
allocation elasticity\/} can be used to control the
heap growth rate for variable sized heaps in Jikes
RVM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "allocation curve; elasticity; garbage collection;
java; memory management; microeconomics",
}
@Article{Beg:2010:GTA,
author = "Mirza Beg and Peter van Beek",
title = "A graph theoretic approach to cache-conscious
placement of data for direct mapped caches",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "113--120",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806670",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Caches were designed to amortize the cost of memory
accesses by moving copies of frequently accessed data
closer to the processor. Over the years the increasing
gap between processor speed and memory access latency
has made the cache a bottleneck for program
performance. Enhancing cache performance has been
instrumental in speeding up programs. For this reason
several hardware and software techniques have been
proposed by researchers to optimize the cache for
minimizing the number of misses. Among these are
compile-time data placement techniques in memory which
improve cache performance. For the purpose of this
work, we concern ourselves with the problem of laying
out data in memory given the sequence of accesses on a
finite set of data objects such that cache-misses are
minimized. The problem has been shown to be hard to
solve optimally even if the sequence of data accesses
is known at compile time. In this paper we show that
given a direct-mapped cache, its size, and the data
access sequence, it is possible to identify the
instances where there are no conflict misses. We
describe an algorithm that can assign the data to cache
for minimal number of misses if there exists a way in
which conflict misses can be avoided altogether. We
also describe the implementation of a heuristic for
assigning data to cache for instances where the size of
the cache forces conflict misses. Experiments show that
our technique results in a 30\% reduction in the number
of cache misses compared to the original assignment.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "cache consciousness; cache optimization; data
placement in cache; memory management; offline
algorithms",
}
@Article{Albert:2010:PIM,
author = "Elvira Albert and Samir Genaim and Miguel
G{\'o}mez-Zamalloa",
title = "Parametric inference of memory requirements for
garbage collected languages",
journal = j-SIGPLAN,
volume = "45",
number = "8",
pages = "121--130",
month = aug,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1806651.1806671",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:55:48 MDT 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The accurate prediction of program's memory
requirements is a critical component in software
development. Existing heap space analyses either do not
take deallocation into account or adopt specific models
of garbage collectors which do not necessarily
correspond to the actual memory usage. We present a
novel approach to inferring upper bounds on memory
requirements of Java-like programs which is {\em
parametric\/} on the notion of {\em object lifetime},
i.e., on when objects become collectible. If objects
lifetimes are inferred by a reachability analysis, then
our analysis infers accurate upper bounds on the memory
consumption for a {\em reachability\/} -based garbage
collector. Interestingly, if objects lifetimes are
inferred by a {\em heap liveness\/} analysis, then we
approximate the program minimal memory requirement,
i.e., the peak memory usage when using an optimal
garbage collector which frees objects as soon as they
become dead. The key idea is to integrate information
on objects lifetimes into the process of generating the
{\em recurrence equations\/} which capture the memory
usage at the different program states. If the heap size
limit is set to the memory requirement inferred by our
analysis, it is ensured that execution will not exceed
the memory limit with the only assumption that garbage
collection works when the limit is reached. Experiments
on Java bytecode programs provide evidence of the
feasibility and accuracy of our analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "garbage collection; java bytecode; live heap space
analysis; low-level languages; peak memory
consumption",
}
@Article{Gordon:2010:MMO,
author = "Michael J. C. Gordon",
title = "{ML}: metalanguage or object language?",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "1--2",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863545",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chapman:2010:GAL,
author = "James Chapman and Pierre-{\'E}variste Dagand and Conor
McBride and Peter Morris",
title = "The gentle art of levitation",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "3--14",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863547",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Vytiniotis:2010:FPE,
author = "Dimitrios Vytiniotis and Andrew J. Kennedy",
title = "Functional pearl: every bit counts",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "15--26",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863548",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Buisson:2010:RES,
author = "J{\'e}r{\'e}my Buisson and Fabien Dagnat",
title = "{ReCaml}: execution state as the cornerstone of
reconfigurations",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "27--38",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863550",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mazurak:2010:LCC,
author = "Karl Mazurak and Steve Zdancewic",
title = "{Lolliproc}: to concurrency from classical linear
logic via {Curry--Howard} and control",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "39--50",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863551",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{VanHorn:2010:AAM,
author = "David {Van Horn} and Matthew Might",
title = "Abstracting abstract machines",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "51--62",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863553",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Holdermans:2010:PFA,
author = "Stefan Holdermans and Jurriaan Hage",
title = "Polyvariant flow analysis with higher-ranked
polymorphic types and higher-order effect operators",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "63--74",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863554",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Naylor:2010:RR,
author = "Matthew Naylor and Colin Runciman",
title = "The {Reduceron} reconfigured",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "75--86",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863556",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The leading implementations of graph reduction all
target conventional processors designed for low-level
imperative execution. In this paper, we present a
processor specially designed to perform
graph-reduction. Our processor the Reduceron is
implemented using off-the-shelf reconfigurable
hardware. We highlight the low-level parallelism
present in sequential graph reduction, and show how
parallel memories and dynamic analyses are used in the
Reduceron to achieve an average reduction rate of 0.55
function applications per clock-cycle.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Scott:2010:UFP,
author = "David Scott and Richard Sharp and Thomas Gazagnaire
and Anil Madhavapeddy",
title = "Using functional programming within an industrial
product group: perspectives and perceptions",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "87--92",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863557",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bergstrom:2010:LTS,
author = "Lars Bergstrom and Mike Rainey and John Reppy and Adam
Shaw and Matthew Fluet",
title = "Lazy tree splitting",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "93--104",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863558",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bierman:2010:SSS,
author = "Gavin M. Bierman and Andrew D. Gordon and Catalin
Hritcu and David Langworthy",
title = "Semantic subtyping with an {SMT} solver",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "105--116",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863560",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tobin-Hochstadt:2010:LTU,
author = "Sam Tobin-Hochstadt and Matthias Felleisen",
title = "Logical types for untyped languages",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "117--128",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863561",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Felleisen:2010:TC,
author = "Matthias Felleisen",
title = "{TeachScheme!}: a checkpoint",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "129--130",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863563",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Crary:2010:HOR,
author = "Karl Crary",
title = "Higher-order representation of substructural logics",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "131--142",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863565",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dreyer:2010:IHO,
author = "Derek Dreyer and Georg Neis and Lars Birkedal",
title = "The impact of higher-order state and control effects
on local relational reasoning",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "143--156",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863566",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Reed:2010:DMT,
author = "Jason Reed and Benjamin C. Pierce",
title = "Distance makes the types grow stronger: a calculus for
differential privacy",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "157--168",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863568",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Morgenstern:2010:STP,
author = "Jamie Morgenstern and Daniel R. Licata",
title = "Security-typed programming within dependently typed
programming",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "169--180",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Voigtlander:2010:CSS,
author = "Janis Voigtl{\"a}nder and Zhenjiang Hu and Kazutaka
Matsuda and Meng Wang",
title = "Combining syntactic and semantic
bidirectionalization",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "181--192",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863571",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Barbosa:2010:MLA,
author = "Davi M. J. Barbosa and Julien Cretin and Nate Foster
and Michael Greenberg and Benjamin C. Pierce",
title = "Matching lenses: alignment and view update",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "193--204",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863572",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hidaka:2010:BGT,
author = "Soichiro Hidaka and Zhenjiang Hu and Kazuhiro Inaba
and Hiroyuki Kato and Kazutaka Matsuda and Keisuke
Nakano",
title = "Bidirectionalizing graph transformations",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "205--216",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863573",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pouillard:2010:FLP,
author = "Nicolas Pouillard and Fran{\c{c}}ois Pottier",
title = "A fresh look at programming with names and binders",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "217--228",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863575",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Crestani:2010:ERG,
author = "Marcus Crestani and Michael Sperber",
title = "Experience report: growing programming languages for
beginning students",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "229--234",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863576",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Culpepper:2010:FM,
author = "Ryan Culpepper and Matthias Felleisen",
title = "Fortifying macros",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "235--246",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863577",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Blelloch:2010:FPA,
author = "Guy E. Blelloch",
title = "Functional parallel algorithms",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "247--248",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863579",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Arnold:2010:SVS,
author = "Gilad Arnold and Johannes H{\"o}lzl and Ali Sinan
K{\"o}ksal and Rastislav Bod{\'\i}k and Mooly Sagiv",
title = "Specifying and verifying sparse matrix codes",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "249--260",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863581",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Keller:2010:RSP,
author = "Gabriele Keller and Manuel M. T. Chakravarty and Roman
Leshchinskiy and Simon Peyton Jones and Ben Lippmeier",
title = "Regular, shape-polymorphic, parallel arrays in
{Haskell}",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "261--272",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863582",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{McCreight:2010:CFC,
author = "Andrew McCreight and Tim Chevalier and Andrew
Tolmach",
title = "A certified framework for compiling and executing
garbage-collected languages",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "273--284",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863584",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Danielsson:2010:TPC,
author = "Nils Anders Danielsson",
title = "Total parser combinators",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "285--296",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863585",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Brady:2010:SYI,
author = "Edwin C. Brady and Kevin Hammond",
title = "Scrapping your inefficient engine: using partial
evaluation to improve domain-specific language
implementation",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "297--308",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863587",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mitchell:2010:RS,
author = "Neil Mitchell",
title = "Rethinking supercompilation",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "309--320",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863588",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chargueraud:2010:PVT,
author = "Arthur Chargu{\'e}raud",
title = "Program verification through characteristic formulae",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "321--332",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863590",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Stampoulis:2010:VTC,
author = "Antonis Stampoulis and Zhong Shao",
title = "{VeriML}: typed computation of logical terms inside a
language with effects",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "333--344",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863591",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bernardy:2010:PDT,
author = "Jean-Philippe Bernardy and Patrik Jansson and Ross
Paterson",
title = "Parametricity and dependent types",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "345--356",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863592",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fischer:2010:PRE,
author = "Sebastian Fischer and Frank Huch and Thomas Wilke",
title = "A play on regular expressions: functional pearl",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "357--368",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863594",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pop:2010:ERH,
author = "Iustin Pop",
title = "Experience report: {Haskell} as a reagent: results and
observations on the use of {Haskell} in a {Python}
project",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "369--374",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863595",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Morris:2010:ICT,
author = "J. Garrett Morris and Mark P. Jones",
title = "Instance chains: type class programming without
overlapping instances",
journal = j-SIGPLAN,
volume = "45",
number = "9",
pages = "375--386",
month = sep,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932681.1863596",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:43 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Forrest:2010:CES,
author = "Stephanie Forrest",
title = "The case for evolvable software",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "1--1",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pierce:2010:ASF,
author = "Benjamin C. Pierce",
title = "Art, science, and fear",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "2--2",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869540",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Syme:2010:FTS,
author = "Don Syme",
title = "{F\#}: Taking Succinct, Efficient, Typed Functional
Programming into the Mainstream",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "3--3",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1921682",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Stanley:2010:AOH,
author = "Kenneth O. Stanley",
title = "To achieve our highest goals, we must be willing to
abandon them",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "3--3",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869541",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Roberson:2010:EMG,
author = "Michael Roberson and Chandrasekhar Boyapati",
title = "Efficient modular glass box software model checking",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "4--21",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869461",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hanenberg:2010:EAS,
author = "Stefan Hanenberg",
title = "An experiment about static and dynamic type systems:
doubts about the positive impact of static type systems
on development time",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "22--35",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869462",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Itzhaky:2010:SIS,
author = "Shachar Itzhaky and Sumit Gulwani and Neil Immerman
and Mooly Sagiv",
title = "A simple inductive synthesis methodology and its
applications",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "36--46",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869463",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mercadal:2010:DSA,
author = "Julien Mercadal and Quentin Enard and Charles Consel
and Nicolas Loriant",
title = "A domain-specific approach to architecturing error
handling in pervasive computing",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "47--61",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869465",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2010:GFR,
author = "Wei Li and Charles Zhang and Songlin Hu",
title = "{G-Finder}: routing programming questions closer to
the experts",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "62--73",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869466",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hoda:2010:AC,
author = "Rashina Hoda and Philippe Kruchten and James Noble and
Stuart Marshall",
title = "Agility in context",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "74--88",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869467",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Auerbach:2010:LJC,
author = "Joshua Auerbach and David F. Bacon and Perry Cheng and
Rodric Rabbah",
title = "{Lime}: a {Java}-compatible and synthesizable language
for heterogeneous architectures",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "89--108",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869469",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kou:2010:OFF,
author = "Stephen Kou and Jens Palsberg",
title = "From {OO} to {FPGA}: fitting round objects into square
hardware?",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "109--124",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869470",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tian:2010:ICP,
author = "Kai Tian and Yunlian Jiang and Eddy Z. Zhang and
Xipeng Shen",
title = "An input-centric paradigm for program dynamic
optimizations",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "125--139",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869471",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wood:2010:CSS,
author = "Benjamin P. Wood and Adrian Sampson and Luis Ceze and
Dan Grossman",
title = "Composable specifications for structured shared-memory
communication",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "140--159",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869473",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shi:2010:DUW,
author = "Yao Shi and Soyeon Park and Zuoning Yin and Shan Lu
and Yuanyuan Zhou and Wenguang Chen and Weimin Zheng",
title = "Do {I} use the wrong definition?: {DeFuse}:
definition-use invariants for detecting concurrency and
sequential bugs",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "160--174",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869474",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gabel:2010:SSD,
author = "Mark Gabel and Junfeng Yang and Yuan Yu and Moises
Goldszmidt and Zhendong Su",
title = "Scalable and systematic detection of buggy
inconsistencies in source code",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "175--190",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869475",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ogata:2010:SJN,
author = "Kazunori Ogata and Dai Mikurube and Kiyokuni Kawachiya
and Scott Trent and Tamiya Onodera",
title = "A study of {Java}'s non-{Java} memory",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "191--204",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869477",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{McIlroy:2010:HJR,
author = "Ross McIlroy and Joe Sventek",
title = "{Hera-JVM}: a runtime system for heterogeneous
multi-core architectures",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "205--222",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869478",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wegiel:2010:CLT,
author = "Michal Wegiel and Chandra Krintz",
title = "Cross-language, type-safe, and transparent object
sharing for co-located managed runtimes",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "223--240",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869479",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jin:2010:ISS,
author = "Guoliang Jin and Aditya Thakur and Ben Liblit and Shan
Lu",
title = "Instrumentation and sampling strategies for
cooperative concurrency bug isolation",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "241--255",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869481",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Reichenbach:2010:WCG,
author = "Christoph Reichenbach and Neil Immerman and Yannis
Smaragdakis and Edward E. Aftandilian and Samuel
Z. Guyer",
title = "What can the {GC} compute efficiently?: a language for
heap assertions at {GC} time",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "256--269",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869482",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Purandare:2010:MOS,
author = "Rahul Purandare and Matthew B. Dwyer and Sebastian
Elbaum",
title = "Monitor optimization via stutter-equivalent loop
transformation",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "270--285",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869483",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Schaefer:2010:SIR,
author = "Max Schaefer and Oege de Moor",
title = "Specifying and implementing refactorings",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "286--301",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869485",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nguyen:2010:GBA,
author = "Hoan Anh Nguyen and Tung Thanh Nguyen and Gary
{Wilson, Jr.} and Anh Tuan Nguyen and Miryung Kim and
Tien N. Nguyen",
title = "A graph-based approach to {API} usage adaptation",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "302--321",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869486",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kell:2010:CAA,
author = "Stephen Kell",
title = "Component adaptation and assembly using interface
relations",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "322--340",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869487",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Oliveira:2010:TCO,
author = "Bruno C. d. S. Oliveira and Adriaan Moors and Martin
Odersky",
title = "Type classes as objects and implicits",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "341--360",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869489",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lerner:2010:SDT,
author = "Benjamin S. Lerner and Herman Venter and Dan
Grossman",
title = "Supporting dynamic, third-party code customizations in
{JavaScript} using aspects",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "361--376",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869490",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Herzeel:2010:DPR,
author = "Charlotte Herzeel and Pascal Costanza",
title = "Dynamic parallelization of recursive code: part 1:
managing control flow interactions with the
continuator",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "377--396",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869491",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dillig:2010:SHA,
author = "Isil Dillig and Thomas Dillig and Alex Aiken",
title = "Symbolic heap abstraction with demand-driven
axiomatization of memory invariants",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "397--410",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869493",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liang:2010:DEP,
author = "Percy Liang and Omer Tripp and Mayur Naik and Mooly
Sagiv",
title = "A dynamic evaluation of the precision of static heap
abstractions",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "411--427",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869494",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mendez-Lojo:2010:PIB,
author = "Mario M{\'e}ndez-Lojo and Augustine Mathew and Keshav
Pingali",
title = "Parallel inclusion-based points-to analysis",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "428--443",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869495",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kats:2010:SLW,
author = "Lennart C. L. Kats and Eelco Visser",
title = "The {Spoofax} language workbench: rules for
declarative specification of languages and {IDEs}",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "444--463",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869497",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Servetto:2010:MMC,
author = "Marco Servetto and Elena Zucca",
title = "{MetaFJig}: a meta-circular composition language for
{Java}-like classes",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "464--483",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869498",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Klose:2010:MLM,
author = "Karl Klose and Klaus Ostermann",
title = "Modular logic metaprogramming",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "484--503",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869499",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{vanStaden:2010:RAM,
author = "Stephan van Staden and Cristiano Calcagno",
title = "Reasoning about multiple related abstractions with
{MultiStar}",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "504--519",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869501",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Qi:2010:HFS,
author = "Xin Qi and Andrew C. Myers",
title = "Homogeneous family sharing",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "520--538",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869502",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chiba:2010:MMC,
author = "Shigeru Chiba and Atsushi Igarashi and Salikh
Zakirov",
title = "Mostly modular compilation of crosscutting concerns by
contextual predicate dispatch",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "539--554",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869503",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Klein:2010:RTH,
author = "Casey Klein and Matthew Flatt and Robert Bruce
Findler",
title = "Random testing for higher-order, stateful programs",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "555--566",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869505",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{McCarthy:2010:TSS,
author = "Jay A. McCarthy",
title = "The two-state solution: native and serializable
continuations accord",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "567--582",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869506",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Swaine:2010:BFI,
author = "James Swaine and Kevin Tew and Peter Dinda and Robert
Bruce Findler and Matthew Flatt",
title = "Back to the futures: incremental parallelization of
existing sequential runtime systems",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "583--597",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869507",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zibin:2010:OIG,
author = "Yoav Zibin and Alex Potanin and Paley Li and Mahmood
Ali and Michael D. Ernst",
title = "Ownership and immutability in generic {Java}",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "598--617",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cameron:2010:TO,
author = "Nicholas Cameron and James Noble and Tobias
Wrigstad",
title = "Tribal ownership",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "618--633",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869510",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Matsakis:2010:TAT,
author = "Nicholas D. Matsakis and Thomas R. Gross",
title = "A time-aware type system for data-race protection and
guaranteed initialization",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "634--651",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Upadhyaya:2010:AAR,
author = "Gautam Upadhyaya and Samuel P. Midkiff and Vijay S.
Pai",
title = "Automatic atomic region identification in shared
memory {SPMD} programs",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "652--670",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869513",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kulkarni:2010:TTP,
author = "Aditya Kulkarni and Yu David Liu and Scott F. Smith",
title = "Task types for pervasive atomicity",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "671--690",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869514",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Burckhardt:2010:CPR,
author = "Sebastian Burckhardt and Alexandro Baldassin and Daan
Leijen",
title = "Concurrent programming with revisions and isolation
types",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "691--707",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869515",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bebenita:2010:STB,
author = "Michael Bebenita and Florian Brandner and Manuel
Fahndrich and Francesco Logozzo and Wolfram Schulte and
Nikolai Tillmann and Herman Venter",
title = "{SPUR}: a trace-based {JIT} compiler for {CIL}",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "708--725",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kapur:2010:RRL,
author = "Puneet Kapur and Brad Cossette and Robert J. Walker",
title = "Refactoring references for library migration",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "726--738",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Altman:2010:PAI,
author = "Erik Altman and Matthew Arnold and Stephen Fink and
Nick Mitchell",
title = "Performance analysis of idle programs",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "739--753",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869519",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Davis:2010:RBL,
author = "Samuel Davis and Gregor Kiczales",
title = "Registration-based language abstractions",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "754--773",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Verwaest:2010:PBR,
author = "Toon Verwaest and Camillo Bruni and David Gurtner and
Adrian Lienhard and Oscar Niestrasz",
title = "{Pinocchio}: bringing reflection to life with
first-class interpreters",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "774--789",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Rajan:2010:CMD,
author = "Hridesh Rajan and Steven M. Kautz and Wayne
Rowcliffe",
title = "Concurrency by modularity: design patterns, a case in
point",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "790--805",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869523",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Rinard:2010:PSA,
author = "Martin Rinard and Henry Hoffmann and Sasa Misailovic
and Stelios Sidiroglou",
title = "Patterns and statistical analysis for understanding
reduced resource computing",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "806--821",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sorensen:2010:PTC,
author = "Andrew Sorensen and Henry Gardner",
title = "Programming with time: cyber-physical programming with
{impromptu}",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "822--834",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chafi:2010:LVH,
author = "Hassan Chafi and Zach DeVito and Adriaan Moors and
Tiark Rompf and Arvind K. Sujeeth and Pat Hanrahan and
Martin Odersky and Kunle Olukotun",
title = "Language virtualization for heterogeneous parallel
computing",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "835--847",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869527",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ossher:2010:FMT,
author = "Harold Ossher and Rachel Bellamy and Ian Simmonds and
David Amid and Ateret Anaby-Tavor and Matthew Callery
and Michael Desmond and Jacqueline de Vries and Amit
Fisher and Sophia Krasikov",
title = "Flexible modeling tools for pre-requirements analysis:
conceptual architecture and research challenges",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "848--864",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dumitras:2010:UUI,
author = "Tudor Dumitras and Priya Narasimhan and Eli
Tilevich",
title = "To upgrade or not to upgrade: impact of online
upgrades across multiple administrative domains",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "865--876",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869530",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Arnold:2010:MAP,
author = "Kenneth C. Arnold and Henry Lieberman",
title = "Managing ambiguity in programming by finding
unambiguous examples",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "877--884",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gabriel:2010:BST,
author = "Richard P. Gabriel and Kevin J. Sullivan",
title = "Better science through art",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "885--900",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869533",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Quillien:2010:RDN,
author = "Jenny Quillien and Dave West",
title = "Rubber ducks, nightmares, and unsaturated predicates:
proto-scientific schemata are good for agile",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "901--917",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kats:2010:PDS,
author = "Lennart C. L. Kats and Eelco Visser and Guido
Wachsmuth",
title = "Pure and declarative syntax definition: paradise lost
and regained",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "918--932",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869535",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hanenberg:2010:FHL,
author = "Stefan Hanenberg",
title = "Faith, hope, and love: an essay on software science's
neglect of human factors",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "933--946",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Adamczyk:2010:TBD,
author = "Paul Adamczyk and Munawar Hafiz",
title = "The {Tower of Babel} did not fail",
journal = j-SIGPLAN,
volume = "45",
number = "10",
pages = "947--957",
month = oct,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1932682.1869537",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:13:46 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Rendel:2010:ISD,
author = "Tillmann Rendel and Klaus Ostermann",
title = "Invertible syntax descriptions: unifying parsing and
pretty printing",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "1--12",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Straka:2010:PHC,
author = "Milan Straka",
title = "The performance of the {Haskell} containers package",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "13--24",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Pirog:2010:SDS,
author = "Maciej Pirog and Dariusz Biernacki",
title = "A systematic derivation of the {STG} machine verified
in {Coq}",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "25--36",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863528",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Magalhaes:2010:GDM,
author = "Jos{\'e} Pedro Magalh{\~a}es and Atze Dijkstra and
Johan Jeuring and Andres L{\"o}h",
title = "A generic deriving mechanism for {Haskell}",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "37--48",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{vanGroningen:2010:ESB,
author = "John van Groningen and Thomas van Noort and Peter
Achten and Pieter Koopman and Rinus Plasmeijer",
title = "Exchanging sources between {Clean} and {Haskell}: a
double-edged front end for the {Clean} compiler",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "49--60",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863530",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The functional programming languages Clean and Haskell
have been around for over two decades. Over time, both
languages have developed a large body of useful
libraries and come with interesting language features.
It is our primary goal to benefit from each other's
evolutionary results by facilitating the exchange of
sources between Clean and Haskell and study the
forthcoming interactions between their distinct
languages features. This is achieved by using the
existing Clean compiler as starting point, and
implementing a double-edged front end for this
compiler: it supports both standard Clean 2.1 and
(currently a large part of) standard Haskell 98.
Moreover, it allows both languages to seamlessly use
many of each other's language features that were alien
to each other before. For instance, Haskell can now use
uniqueness typing anywhere, and Clean can use newtypes
efficiently. This has given birth to two new dialects
of Clean and Haskell, dubbed Clean* and Haskell*.
Additionally, measurements of the performance of the
new compiler indicate that it is on par with the
flagship Haskell compiler GHC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Morris:2010:ERU,
author = "J. Garrett Morris",
title = "Experience report: using hackage to inform language
design",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "61--66",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Mainland:2010:NEC,
author = "Geoffrey Mainland and Greg Morrisett",
title = "{Nikola}: embedding compiled {GPU} functions in
{Haskell}",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "67--78",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863533",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Launchbury:2010:COH,
author = "John Launchbury and Trevor Elliott",
title = "Concurrent orchestration in {Haskell}",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "79--90",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Marlow:2010:SNM,
author = "Simon Marlow and Patrick Maier and Hans-Wolfgang Loidl
and Mustafa K. Aswad and Phil Trinder",
title = "Seq no more: better strategies for parallel
{Haskell}",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "91--102",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863535",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{OSullivan:2010:SEH,
author = "Bryan O'Sullivan and Johan Tibell",
title = "Scalable {I/O} event handling for {GHC}",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "103--108",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Terei:2010:LBG,
author = "David A. Terei and Manuel M. T. Chakravarty",
title = "An {{\tt llvm}} backend for {GHC}",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "109--120",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863538",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Ramsey:2010:HMR,
author = "Norman Ramsey and Jo{\~a}o Dias and Simon Peyton
Jones",
title = "{Hoopl}: a modular, reusable library for dataflow
analysis and transformation",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "121--134",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Bolingbroke:2010:SE,
author = "Maximilian Bolingbroke and Simon Peyton Jones",
title = "Supercompilation by evaluation",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "135--146",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863540",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Yorgey:2010:SFT,
author = "Brent A. Yorgey",
title = "Species and functors and types, oh my!",
journal = j-SIGPLAN,
volume = "45",
number = "11",
pages = "147--158",
month = nov,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2088456.1863542",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:45 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "HASKELL '10 conference proceedings.",
}
@Article{Brunthaler:2010:EIU,
author = "Stefan Brunthaler",
title = "Efficient interpretation using quickening",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "1--14",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869633",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Just-in-time compilers offer the biggest achievable
payoff performance-wise, but their implementation is a
non-trivial, time-consuming task affecting the
interpreter's maintenance for years to come, too.
Recent research addresses this issue by providing ways
of leveraging existing just-in-time compilation
infrastructures. Though there has been considerable
research on improving the efficiency of just-in-time
compilers, the area of optimizing interpreters has
gotten less attention as if the implementation of a
dynamic translation system was the ``ultima ratio'' for
efficiently interpreting programming languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zakirov:2010:ODD,
author = "Salikh S. Zakirov and Shigeru Chiba and Etsuya
Shibayama",
title = "Optimizing dynamic dispatch with fine-grained state
tracking",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "15--26",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869634",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic mixin is a construct available in Ruby and
other dynamic languages. It can be used as a base to
implement a range of programming paradigms, such as
dynamic aspect-oriented programming and
context-oriented programming. However, the performance
characteristics of current implementation of dynamic
mixin in Ruby leaves much to be desired under condition
of frequent dynamic mixin operations, global method
cache and inline cache misses incur significant
overhead. In this work we implemented fine-grained
state tracking for CRuby 1. and were able to improve
performance by more than six times on the
microbenchmark exercising extreme case flowing 4 times
to global method cache clearing, 28\% to fine-grained
state tracking and further 12\% to inline cache miss
elimination by caching alternating states.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gorbovitski:2010:AAO,
author = "Michael Gorbovitski and Yanhong A. Liu and Scott D.
Stoller and Tom Rothamel and Tuncay K. Tekle",
title = "Alias analysis for optimization of dynamic languages",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "27--42",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869635",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic languages such as Python allow programs to be
written more easily using high-level constructs such as
comprehensions for queries and using generic code.
Efficient execution of programs then requires powerful
optimizations - incrementalization of expensive queries
and specialization of generic code. Effective
incrementalization and specialization of dynamic
languages require precise and scalable alias analysis.
This paper describes the development and experimental
evaluation of a may-alias analysis for a full dynamic
object-oriented language, for program optimization by
incrementalization and specialization. The analysis is
flow-sensitive; we show that this is necessary for
effective optimization of dynamic languages. It uses
precise type analysis and a powerful form of context
sensitivity, called trace sensitivity, to further
improve analysis precision.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pestov:2010:FDS,
author = "Sviatoslav Pestov and Daniel Ehrenberg and Joe
Groff",
title = "{Factor}: a dynamic stack-based programming language",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "43--58",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869637",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Factor is a new dynamic object-oriented programming
language. It began as an embedded scripting language
and evolved to a mature application development
language. The language has a simple execution model and
is based on the manipulation of data on a stack. An
advanced metaprogramming system provides means for
easily extending the language. Thus, Factor allows
programmers to use the right features for their problem
domain. The Factor implementation is self-hosting,
featuring an interactive development environment and an
optimizing compiler. In this paper, the language and
its implementation are presented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{VanCutsem:2010:PDP,
author = "Tom {Van Cutsem} and Mark S. Miller",
title = "Proxies: design principles for robust object-oriented
intercession {APIs}",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "59--72",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869638",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Proxies are a powerful approach to implement
meta-objects in object-oriented languages without
having to resort to metacircular interpretation. We
introduce such a meta-level API based on proxies for
Javascript. We simultaneously introduce a set of design
principles that characterize such APIs in general, and
compare similar APIs of other languages in terms of
these principles. We highlight how principled
proxy-based APIs improve code robustness by avoiding
interference between base and meta-level code that
occur in more common reflective intercession
mechanisms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tratt:2010:EIL,
author = "Laurence Tratt",
title = "Experiences with an {Icon3}-like expression evaluation
system",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "73--80",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869640",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The design of the Icon programming language's
expression evaluation system, which can perform limited
backtracking, was unique amongst imperative programming
languages when created. In this paper I explain and
critique the original Icon design and show how a
similar system can be integrated into a modern
dynamically typed language. Finally I detail my
experiences of this system and offer suggestions for
the lessons to be learned from it.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Axelsen:2010:CDM,
author = "Eyvind W. Axelsen and Stein Krogdahl and Birger
M{\o}ller-Pedersen",
title = "Controlling dynamic module composition through an
extensible meta-level {API}",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "81--96",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869641",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In addition to traditional object-oriented (OO)
concepts such as inheritance and polymorphism, several
modularization and composition mechanisms like e.g.
traits, mixins and virtual classes have emerged. The
Package Template mechanism is another attempt at
providing a flexible mechanism for modularization,
composition and adaption. Dynamic languages have
traditionally employed strong support for
meta-programming, with hooks to control OO concepts
such as method invocation and object construction, by
utilizing meta-classes and meta-object protocols. In
this work, we attempt to bring a corresponding degree
of meta-level control to composition primitives, with a
concrete starting point in the package template
mechanism as developed for the dynamic language
Groovy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Strickland:2010:CFC,
author = "T. Stephen Strickland and Matthias Felleisen",
title = "Contracts for first-class classes",
journal = j-SIGPLAN,
volume = "45",
number = "12",
pages = "97--112",
month = dec,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1899661.1869642",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Dec 15 10:25:15 MST 2010",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "First-class classes add expressive power to
class-based object-oriented languages. Most
importantly, programmers can abstract over common
scenarios with first-class classes. When it comes to
behavioral software contracts, however, first-class
classes pose significant challenges. In this paper, we
present the first contract system for a programming
language with first-class classes. The design has been
implemented for Racket, which supports first-class
classes and which implements mixins and traits as
syntactic sugar. We expect that our experience also
applies to languages with native mixins and/or
traits.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Leroy:2011:VSD,
author = "Xavier Leroy",
title = "Verified squared: does critical software deserve
verified tools?",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "1--2",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926387",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lhotak:2011:PAE,
author = "Ondrej Lhot{\'a}k and Kwok-Chiang Andrew Chung",
title = "Points-to analysis with efficient strong updates",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "3--16",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926389",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Smaragdakis:2011:PYC,
author = "Yannis Smaragdakis and Martin Bravenboer and Ondrej
Lhot{\'a}k",
title = "Pick your contexts well: understanding
object-sensitivity",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "17--30",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926390",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liang:2011:LMA,
author = "Percy Liang and Omer Tripp and Mayur Naik",
title = "Learning minimal abstractions",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "31--42",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926391",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sevcik:2011:RMC,
author = "Jaroslav {\v{S}}ev{\c{c}}ik and Viktor Vafeiadis and
Francesco Zappa Nardelli and Suresh Jagannathan and
Peter Sewell",
title = "Relaxed-memory concurrency and verified compilation",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "43--54",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926393",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Batty:2011:MCC,
author = "Mark Batty and Scott Owens and Susmit Sarkar and Peter
Sewell and Tjark Weber",
title = "Mathematizing {C++} concurrency",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "55--66",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926394",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ramananandro:2011:FVO,
author = "Tahina Ramananandro and Gabriel {Dos Reis} and Xavier
Leroy",
title = "Formal verification of object layout for {C++}
multiple inheritance",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "67--80",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926395",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Choi:2011:SAM,
author = "Wontae Choi and Baris Aktemur and Kwangkeun Yi and
Makoto Tatsuta",
title = "Static analysis of multi-staged programs via unstaging
translation",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "81--92",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926397",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Schwarz:2011:SAI,
author = "Martin D. Schwarz and Helmut Seidl and Vesal Vojdani
and Peter Lammich and Markus M{\"u}ller-Olm",
title = "Static analysis of interrupt-driven programs
synchronized via the priority ceiling protocol",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "93--104",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926398",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cousot:2011:PSF,
author = "Patrick Cousot and Radhia Cousot and Francesco
Logozzo",
title = "A parametric segmentation functor for fully automatic
and scalable array content analysis",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "105--118",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926399",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Birkedal:2011:SIK,
author = "Lars Birkedal and Bernhard Reus and Jan Schwinghammer
and Kristian St{\o}vring and Jacob Thamsborg and
Hongseok Yang",
title = "Step-indexed {Kripke} models over recursive worlds",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "119--132",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926401",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hur:2011:KLR,
author = "Chung-Kil Hur and Derek Dreyer",
title = "A {Kripke} logical relation between {ML} and
assembly",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "133--146",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926402",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pottier:2011:TSP,
author = "Fran{\c{c}}ois Pottier",
title = "A typed store-passing translation for general
references",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "147--158",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926403",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Prountzos:2011:SAO,
author = "Dimitrios Prountzos and Roman Manevich and Keshav
Pingali and Kathryn S. McKinley",
title = "A shape analysis for optimizing parallel graph
programs",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "159--172",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926405",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Rival:2011:CCA,
author = "Xavier Rival and Bor-Yuh Evan Chang",
title = "Calling context abstraction with shapes",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "173--186",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926406",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dillig:2011:PRP,
author = "Isil Dillig and Thomas Dillig and Alex Aiken",
title = "Precise reasoning for programs using containers",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "187--200",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926407",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ahmed:2011:BA,
author = "Amal Ahmed and Robert Bruce Findler and Jeremy G. Siek
and Philip Wadler",
title = "Blame for all",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "201--214",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926409",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dimoulas:2011:CBC,
author = "Christos Dimoulas and Robert Bruce Findler and Cormac
Flanagan and Matthias Felleisen",
title = "Correct blame for contracts: no more scapegoating",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "215--226",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926410",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Weirich:2011:GTA,
author = "Stephanie Weirich and Dimitrios Vytiniotis and Simon
Peyton Jones and Steve Zdancewic",
title = "Generative type abstraction and type-level
computation",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "227--240",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926411",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{MacLaurin:2011:DKT,
author = "Matthew B. MacLaurin",
title = "The design of {Kodu}: a tiny visual programming
language for children on the {Xbox 360}",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "241--246",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926413",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Turon:2011:SLR,
author = "Aaron Joseph Turon and Mitchell Wand",
title = "A separation logic for refining concurrent objects",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "247--258",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926415",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dodds:2011:MRD,
author = "Mike Dodds and Suresh Jagannathan and Matthew J.
Parkinson",
title = "Modular reasoning for deterministic parallelism",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "259--270",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926416",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jacobs:2011:EMF,
author = "Bart Jacobs and Frank Piessens",
title = "Expressive modular fine-grained concurrency
specification",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "271--282",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926417",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Madhusudan:2011:TWA,
author = "P. Madhusudan and Gennaro Parlato",
title = "The tree width of auxiliary storage",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "283--294",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926419",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tzevelekos:2011:FRA,
author = "Nikos Tzevelekos",
title = "Fresh-register automata",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "295--306",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926420",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Leroux:2011:VAS,
author = "J{\'e}r{\^o}me Leroux",
title = "Vector addition system reachability problem: a short
self-contained proof",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "307--316",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926421",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gulwani:2011:ASP,
author = "Sumit Gulwani",
title = "Automating string processing in spreadsheets using
input-output examples",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "317--330",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926423",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gupta:2011:PAR,
author = "Ashutosh Gupta and Corneliu Popeea and Andrey
Rybalchenko",
title = "Predicate abstraction and refinement for verifying
multi-threaded programs",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "331--344",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926424",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ghica:2011:GSIa,
author = "Dan R. Ghica and Alex Smith",
title = "Geometry of synthesis {III}: resource management
through type inference",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "345--356",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926425",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hoffmann:2011:MAR,
author = "Jan Hoffmann and Klaus Aehlig and Martin Hofmann",
title = "Multivariate amortized resource analysis",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "357--370",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926427",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hofmann:2011:SL,
author = "Martin Hofmann and Benjamin Pierce and Daniel Wagner",
title = "Symmetric lenses",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "371--384",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926428",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Henglein:2011:REC,
author = "Fritz Henglein and Lasse Nielsen",
title = "Regular expression containment: coinductive
axiomatization and computational interpretation",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "385--398",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926429",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cook:2011:MPD,
author = "Byron Cook and Eric Koskinen",
title = "Making prophecies with decision predicates",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "399--410",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926431",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Emmi:2011:DBS,
author = "Michael Emmi and Shaz Qadeer and Zvonimir
Rakamari{\'c}",
title = "Delay-bounded scheduling",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "411--422",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926432",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sinha:2011:IA,
author = "Nishant Sinha and Chao Wang",
title = "On interference abstractions",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "423--434",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926433",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Denielou:2011:DMS,
author = "Pierre-Malo Deni{\'e}lou and Nobuko Yoshida",
title = "Dynamic multirole session types",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "435--446",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926435",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tov:2011:PAT,
author = "Jesse A. Tov and Riccardo Pucella",
title = "Practical affine types",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "447--458",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926436",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{An:2011:DIS,
author = "Jong-hoon (David) An and Avik Chaudhuri and Jeffrey S.
Foster and Michael Hicks",
title = "Dynamic inference of static types for {\tt ruby}",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "459--472",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926437",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gordon:2011:RMV,
author = "Andrew D. Gordon and Robert Harper and John Harrison
and Alan Jeffrey and Peter Sewell",
title = "{Robin Milner 1934--2010}: verification, languages,
and concurrency",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "473--474",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926439",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bendersky:2011:SOB,
author = "Anna Bendersky and Erez Petrank",
title = "Space overhead bounds for dynamic memory management
with partial compaction",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "475--486",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926441",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Attiya:2011:LOE,
author = "Hagit Attiya and Rachid Guerraoui and Danny Hendler
and Petr Kuznetsov and Maged M. Michael and Martin
Vechev",
title = "Laws of order: expensive synchronization in concurrent
algorithms cannot be eliminated",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "487--498",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926442",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Esparza:2011:CPB,
author = "Javier Esparza and Pierre Ganty",
title = "Complexity of pattern-based verification for
multithreaded programs",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "499--510",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926443",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Prabhu:2011:EAF,
author = "Tarun Prabhu and Shreyas Ramalingam and Matthew Might
and Mary Hall",
title = "{EigenCFA}: accelerating flow analysis with {GPUs}",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "511--522",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926445",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Feng:2011:BQP,
author = "Yuan Feng and Runyao Duan and Mingsheng Ying",
title = "Bisimulation for quantum processes",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "523--534",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926446",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bocchino:2011:SND,
author = "Robert L. {Bocchino, Jr.} and Stephen Heumann and Nima
Honarmand and Sarita V. Adve and Vikram S. Adve and
Adam Welc and Tatiana Shpeisman",
title = "Safe nondeterminism in a deterministic-by-default
parallel language",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "535--548",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926447",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pouchet:2011:LTC,
author = "Louis-No{\"e}l Pouchet and Uday Bondhugula and
C{\'e}dric Bastoul and Albert Cohen and J. Ramanujam
and P. Sadayappan and Nicolas Vasilache",
title = "Loop transformations: convexity, pruning and
optimization",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "549--562",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926449",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Guo:2011:ECT,
author = "Shu-yu Guo and Jens Palsberg",
title = "The essence of compiling with traces",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "563--574",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926450",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ramsey:2011:RRM,
author = "Norman Ramsey and Jo{\~a}o Dias",
title = "Resourceable, retargetable, modular instruction
selection using a machine-independent, type-based
tiling of low-level intermediate code",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "575--586",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926451",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ong:2011:VHO,
author = "C.-H. Luke Ong and Steven James Ramsay",
title = "Verifying higher-order functional programs with
pattern-matching algebraic data types",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "587--598",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926453",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Alur:2011:STA,
author = "Rajeev Alur and Pavol Cern{\'y}",
title = "Streaming transducers for algorithmic verification of
single-pass list-processing programs",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "599--610",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926454",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Madhusudan:2011:DLC,
author = "P. Madhusudan and Gennaro Parlato and Xiaokang Qiu",
title = "Decidable logics combining heap structures and data",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "611--622",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926455",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Joisha:2011:TEA,
author = "Pramod G. Joisha and Robert S. Schreiber and
Prithviraj Banerjee and Hans J. Boehm and Dhruva R.
Chakrabarti",
title = "A technique for the effective and automatic reuse of
classical compiler optimizations on multithreaded
code",
journal = j-SIGPLAN,
volume = "46",
number = "1",
pages = "623--636",
month = jan,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1925844.1926457",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jan 26 15:06:39 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lammel:2011:HGS,
author = "Ralf L{\"a}mmel",
title = "The hitchhiker's guide to software languages",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "1--2",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868295",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There is only that much space in the CS curriculum,
and there are always new subjects that should be
accommodated by the curriculum. For instance, in our
community, we would want all graduates to leave
university with a modest background in technical
spaces, software languages, and meta-programming; also,
with conceptually informed and reasonably timeless
skills to efficiently master related programming
techniques and technologies. In reality, the curricula
of few CS departments meet this expectation. In this
talk, I will discuss such curricula-related
expectations of our community and the suboptimal
situation at CS departments---as perceive them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Erwig:2011:LSV,
author = "Martin Erwig",
title = "A language for software variation research",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "3--12",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868296",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Managing variation is an important problem in software
engineering that takes different forms, ranging from
version control and configuration management to
software product lines. In this paper, I present our
recent work on the choice calculus, a fundamental
representation for software variation that can serve as
a common language of discourse for variation research,
filling a role similar to lambda calculus in
programming language research. After motivating the
design of the choice calculus and sketching its
semantics, I will discuss several potential application
areas.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Clarke:2011:ADM,
author = "Dave Clarke and Michiel Helvensteijn and Ina
Schaefer",
title = "Abstract delta modeling",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "13--22",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868298",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Delta modeling is an approach to facilitate automated
product derivation for software product lines. It is
based on a set of deltas specifying modifications that
are incrementally applied to a core product. The
applicability of deltas depends on feature-dependent
conditions. This paper presents abstract delta
modeling, which explores delta modeling from an
abstract, algebraic perspective. Compared to previous
work, we take a more flexible approach with respect to
conflicts between modifications and introduce the
notion of conflict-resolving deltas. We present
conditions on the structure of deltas to ensure
unambiguous product generation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ryssel:2011:AVP,
author = "Uwe Ryssel and Joern Ploennigs and Klaus Kabitzsch",
title = "Automatic variation-point identification in
function-block-based models",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "23--32",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868299",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Function-block-based modeling is often used to develop
embedded systems, particularly as system variants can
be developed rapidly from existing modules. Generative
approaches can simplify the handling and development of
the resulting high variety of function-block-based
models. But they often require the development of new
generic models that do not utilize existing ones.
Reusing existing models will significantly decrease the
effort to apply generative programming. This work
introduces an automatic approach to recognize variants
in a set of models and identify the variation points
and their dependencies within variants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sincero:2011:EEA,
author = "Julio Sincero and Reinhard Tartler and Daniel Lohmann
and Wolfgang Schr{\"o}der-Preikschat",
title = "Efficient extraction and analysis of
preprocessor-based variability",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "33--42",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868300",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The C Preprocessor (CPP) is the tool of choice for the
implementation of variability in many large-scale
configurable software projects. Linux, probably the
most-configurable piece of software ever, employs more
than 10,000 preprocessor variables for this purpose.
However, this de-facto variability tends to be ``hidden
in the code''; which on the long term leads to
variability defects, such as dead code or
inconsistencies with respect to the intended (modeled)
variability of the software. This calls for tool
support for the efficient extraction of (and reasoning
over) CPP-based variability. We suggest a novel
approach to extract CPP-based variability. Our tool
transforms CPP-based variability in O(n) complexity
into a propositional formula that ``mimics'' all valid
effects of conditional compilation and can be analyzed
with standard SAT or BDD packages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Middelkoop:2011:ITI,
author = "Arie Middelkoop and Atze Dijkstra and S. Doaitse
Swierstra",
title = "Iterative type inference with attribute grammars",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "43--52",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868302",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type inference is the process of constructing a typing
derivation while gradually discovering type
information. During this process, inference algorithms
typically make subtle decisions based on the derivation
constructed so far. Because a typing derivation is a
decorated tree we aim to use attribute grammars as the
main implementation tool. Unfortunately, we can neither
express iteration, nor express decisions based on
intermediate derivations in such grammars. We present
the language ruler-front, a conservative extension to
ordered attribute grammars, that deals with the
aforementioned problems. We show why this extension is
suitable for the description of constraint-based
inference algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Krieger:2011:AES,
author = "Matthias P. Krieger and Alexander Knapp and Burkhart
Wolff",
title = "Automatic and efficient simulation of operation
contracts",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "53--62",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868303",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Operation contracts consisting of pre- and
postconditions are a well-known means of specifying
operations. In this paper we deal with the problem of
operation contract simulation, i.e., determining
operation results satisfying the postconditions based
on input data supplied by the user; simulating
operation contracts is an important technique for
requirements validation and prototyping. Current
approaches to operation contract simulation exhibit
poor performance for large sets of input data or
require additional guidance from the user. We show how
these problems can be alleviated and describe an
efficient as well as fully automatic approach. It is
implemented in our tool OCLexec that generates from
UML/OCL operation contracts corresponding Java
implementations which call a constraint solver at
runtime.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Long:2011:IIM,
author = "Yuheng Long and Sean L. Mooney and Tyler Sondag and
Hridesh Rajan",
title = "Implicit invocation meets safe, implicit concurrency",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "63--72",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868304",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing correct and efficient concurrent programs
still remains a challenge. Explicit concurrency is
difficult, error prone, and creates code which is hard
to maintain and debug. This type of concurrency also
treats modular program design and concurrency as
separate goals, where modularity often suffers. To
solve these problems, we are designing a new language
that we call Panini. In this paper, we focus on
Panini's asynchronous, typed events which reconcile the
modularity goal promoted by the implicit invocation
design style with the concurrency goal of exposing
potential concurrency between the execution of subjects
and observers. Since modularity is improved and
concurrency is implicit in Panini, programs are easier
to reason about and maintain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Navas:2011:CBR,
author = "Juan F. Navas and Jean-Philippe Babau and Jacques
Pulou",
title = "A component-based run-time evolution infrastructure
for resource-constrained embedded systems",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "73--82",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868306",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper deals with embedded systems software and
the modification of its architecture and behavior at
execution-time. Incautious implementation of these
features demands both heavy memory and performance
overrun. To accomplish such software evolution
activities in resource-constrained embedded systems, we
propose a component-based run-time evolution
infrastructure that reconciles richness of evolution
alternatives and performance requirements. Our proposal
is based on off-site components reifications, which are
representations of components that allow us to treat
evolution concerns remotely. Hence, the workload to be
processed by the embedded device is alleviated.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hofer:2011:MDS,
author = "Christian Hofer and Klaus Ostermann",
title = "Modular domain-specific language components in
{Scala}",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "83--92",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868307",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programs in domain-specific embedded languages (DSELs)
can be represented in the host language in different
ways, for instance implicitly as libraries, or
explicitly in the form of abstract syntax trees. Each
of these representations has its own strengths and
weaknesses. The implicit approach has good
composability properties, whereas the explicit approach
allows more freedom in making syntactic program
transformations. Traditional designs for DSELs fix the
form of representation, which means that it is not
possible to choose the best representation for a
particular interpretation or transformation. We propose
a new design for implementing DSELs in Scala which
makes it easy to use different program representations
at the same time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wolfinger:2011:AGP,
author = "Reinhard Wolfinger and Markus L{\"o}berbauer and
Markus Jahn and Hanspeter M{\"o}ssenb{\"o}ck",
title = "Adding genericity to a plug-in framework",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "93--102",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868308",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Plug-in components are a means for making feature-rich
applications customizable. Combined with plug-and-play
composition, end users can assemble customized
applications without programming. If plug-and-play
composition is also dynamic, applications can be
reconfigured on the fly to load only components the
user needs for his current work. We have created
Plux.NET, a plug-in framework that supports dynamic
plug-and-play composition. The basis for plug-and-play
in Plux is the composer which replaces programmatic
composition by automatic composition. Components just
specify their requirements and provisions using
metadata. The composer then assembles the components
based on that metadata by matching requirements and
provisions. When the composer needs to reuse
general-purpose components in different parts of an
application, the component model requires genericity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Schulze:2011:CCF,
author = "Sandro Schulze and Sven Apel and Christian
K{\"a}stner",
title = "Code clones in feature-oriented software product
lines",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "103--112",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868310",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Some limitations of object-oriented mechanisms are
known to cause code clones (e.g., extension using
inheritance). Novel programming paradigms such as
feature-oriented programming (FOP) aim at alleviating
these limitations. However, it is an open issue whether
FOP is really able to avoid code clones or whether it
even facilitates (FOP-related) clones. To address this
issue, we conduct an empirical analysis on ten
feature-oriented software product lines with respect to
code cloning. We found that there is a considerable
number of clones in feature-oriented software product
lines and that a large fraction of these clones is
FOP-related (i.e., caused by limitations of
feature-oriented mechanisms).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tanter:2011:CDA,
author = "{\'E}ric Tanter and Philippe Moret and Walter Binder
and Danilo Ansaloni",
title = "Composition of dynamic analysis aspects",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "113--122",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868311",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aspect-oriented programming provides a convenient
high-level model to define several kinds of dynamic
analyses, in particular thanks to recent advances in
exhaustive weaving in core libraries. Casting dynamic
analyses as aspects allows the use of a single weaving
infrastructure to apply different analyses to the same
base program, simultaneously. However, even if dynamic
analysis aspects are mutually independent, their mere
presence perturbs the observations of others: this is
due to the fact that aspectual computation is
potentially visible to all aspects. Because current
aspect composition approaches do not address this kind
of computational interference, combining different
analysis aspects yields at best unpredictable
results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wurthinger:2011:AED,
author = "Thomas W{\"u}rthinger and Walter Binder and Danilo
Ansaloni and Philippe Moret and Hanspeter
M{\"o}ssenb{\"o}ck",
title = "Applications of enhanced dynamic code evolution for
{Java} in {GUI} development and dynamic aspect-oriented
programming",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "123--126",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868312",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While dynamic code evolution in object-oriented
systems is an important feature supported by dynamic
languages, there is currently only limited support for
dynamic code evolution in high-performance,
state-of-the-art runtime systems for statically typed
languages, such as the Java Virtual Machine. In this
tool demonstration, we present the Dynamic Code
Evolution VM, which is based on a recent version of
Oracle's state-of-the-art Java HotSpot(TM) VM and
allows unlimited changes to loaded classes at runtime.
Based on the Dynamic Code Evolution VM, we developed an
enhanced version of the Mantisse GUI builder (which is
part of the NetBeans IDE) that allows adding GUI
components without restarting the application under
development.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Rompf:2011:LMS,
author = "Tiark Rompf and Martin Odersky",
title = "Lightweight modular staging: a pragmatic approach to
runtime code generation and compiled {DSLs}",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "127--136",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868314",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software engineering demands generality and
abstraction, performance demands specialization and
concretization. Generative programming can provide
both, but the effort required to develop high-quality
program generators likely offsets their benefits, even
if a multi-stage programming language is used. We
present lightweight modular staging, a library-based
multi-stage programming approach that breaks with the
tradition of syntactic quasi-quotation and instead uses
only types to distinguish between binding times.
Through extensive use of component technology,
lightweight modular staging makes an optimizing
compiler framework available at the library level,
allowing programmers to tightly integrate
domain-specific abstractions and optimizations into the
generation process. We argue that lightweight modular
staging enables a form of language virtualization,
i.e.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Porkolab:2011:DSL,
author = "Zolt{\'a}n Porkolab and {\'A}bel Sinkovics",
title = "Domain-specific language integration with compile-time
parser generator library",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "137--146",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868315",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Smooth integration of domain-specific languages into a
general purpose host language requires absorbing of
domain code written in arbitrary syntax. The
integration should cause minimal syntactical and
semantic overhead and introduce minimal dependency on
external tools. In this paper we discuss a DSL
integration technique for the C++ programming language.
The solution is based on compile-time parsing of the
DSL code. The parser generator is a C++ template
metaprogram reimplementation of a runtime Haskell
parser generator library. The full parsing phase is
executed when the host program is compiled. The library
uses only standard C++ language features, thus our
solution is highly portable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Atkinson:2011:ACT,
author = "Kevin Atkinson and Matthew Flatt and Gary Lindstrom",
title = "{ABI} compatibility through a customizable language",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "147--156",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868316",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "ZL is a C++-compatible language in which high-level
constructs, such as classes, are defined using macros
over a C-like core language. This approach makes many
parts of the language easily customizable. For example,
since the class construct can be defined using macros,
a programmer can have complete control over the memory
layout of objects. Using this capability, a programmer
can mitigate certain problems in software evolution
such as fragile ABIs (Application Binary Interfaces)
due to software changes and incompatible ABIs due to
compiler changes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bordignon:2011:MBK,
author = "Mirko Bordignon and Ulrik Pagh Schultz and Kasper
Stoy",
title = "Model-based kinematics generation for modular
mechatronic toolkits",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "157--166",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868318",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modular robots are mechatronic devices that enable the
construction of highly versatile and flexible robotic
systems whose mechanical structure can be dynamically
modified. The key feature that enables this dynamic
modification is the capability of the individual
modules to connect to each other in multiple ways and
thus generate a number of different mechanical systems,
in contrast with the monolithics fixed structure of
conventional robots. The mechatronic flexibility,
however, complicates the development of models and
programming abstractions for modular robots, since
manually describing and enumerating the full set of
possible interconnections is tedious and error-prone
for real-world robots. In order to allow for a general
formulation of spatial abstractions for modular robots
and to ensure correct and streamlined generation of
code dependent on mechanical properties, we have
developed the Modular Mechatronics Modelling Language
(M3L).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Miao:2011:ITC,
author = "Weiyu Miao and Jeremy G. Siek",
title = "Incremental type-checking for type-reflective
metaprograms",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "167--176",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868319",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Garcia introduces a calculus for type-reflective
metaprogramming that provides much of the power and
flexibility of C++ templates and solves many of its
problems. However, one of the problems that remains is
that the residual program is not type checked until
after meta computation is complete. Ideally, one would
like the type system of the metaprogram to also
guarantee that the residual program will type check, as
is the case in MetaML. However, in a language with
type-reflective metaprogramming, type expressions in
the residual program may be the result of meta
computation, making the MetaML guarantee next to
impossible to achieve.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Grech:2011:JGE,
author = "Neville Grech and Julian Rathke and Bernd Fischer",
title = "{JEqualityGen}: generating equality and hashing
methods",
journal = j-SIGPLAN,
volume = "46",
number = "2",
pages = "177--186",
month = feb,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1942788.1868320",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Feb 14 16:37:34 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Manually implementing equals (for object comparisons)
and hashCode (for object hashing) methods in large
software projects is tedious and error-prone. This is
due to many special cases, such as field shadowing,
comparison between different types, or cyclic object
graphs. Here, we present JEqualityGen, a source code
generator that automatically derives implementations of
these methods. JEqualityGen proceeds in two states: it
first uses source code reflection in MetaAspectJ to
generate aspects that contain the method
implementations, before it uses weaving on the bytecode
level to insert these into the target application.
JEqualityGen generates not only correct, but efficient
source code that on a typical large-scale Java
application exhibits a performance improvement of more
than two orders of magnitude in the equality operations
generated, compared to an existing system based on
runtime reflection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Larus:2011:CWC,
author = "James R. Larus",
title = "The cloud will change everything",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "1--2",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950367",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Yuan:2011:ISD,
author = "Ding Yuan and Jing Zheng and Soyeon Park and Yuanyuan
Zhou and Stefan Savage",
title = "Improving software diagnosability via log
enhancement",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "3--14",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950369",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Veeraraghavan:2011:DPS,
author = "Kaushik Veeraraghavan and Dongyoon Lee and Benjamin
Wester and Jessica Ouyang and Peter M. Chen and Jason
Flinn and Satish Narayanasamy",
title = "{DoublePlay}: parallelizing sequential logging and
replay",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "15--26",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950370",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Casper:2011:HAT,
author = "Jared Casper and Tayo Oguntebi and Sungpack Hong and
Nathan G. Bronson and Christos Kozyrakis and Kunle
Olukotun",
title = "Hardware acceleration of transactional memory on
commodity systems",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "27--38",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950372",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Dalessandro:2011:HNC,
author = "Luke Dalessandro and Fran{\c{c}}ois Carouge and Sean
White and Yossi Lev and Mark Moir and Michael L. Scott
and Michael F. Spear",
title = "{Hybrid NOrec}: a case study in the effectiveness of
best effort hardware transactional memory",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "39--52",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Singh:2011:EPS,
author = "Abhayendra Singh and Daniel Marino and Satish
Narayanasamy and Todd Millstein and Madan Musuvathi",
title = "Efficient processor support for {DRFx}, a memory model
with exceptions",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "53--66",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950375",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Devietti:2011:RRC,
author = "Joseph Devietti and Jacob Nelson and Tom Bergan and
Luis Ceze and Dan Grossman",
title = "{RCDC}: a relaxed consistency deterministic computer",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "67--78",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950376",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Burnim:2011:SCS,
author = "Jacob Burnim and George Necula and Koushik Sen",
title = "Specifying and checking semantic atomicity for
multithreaded programs",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "79--90",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950377",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Volos:2011:MLP,
author = "Haris Volos and Andres Jaan Tack and Michael M.
Swift",
title = "{Mnemosyne}: lightweight persistent memory",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "91--104",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950379",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Coburn:2011:NHM,
author = "Joel Coburn and Adrian M. Caulfield and Ameen Akel and
Laura M. Grupp and Rajesh K. Gupta and Ranjit Jhala and
Steven Swanson",
title = "{NV-Heaps}: making persistent objects fast and safe
with next-generation, non-volatile memories",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "105--118",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950380",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Schupbach:2011:DLA,
author = "Adrian Sch{\"u}pbach and Andrew Baumann and Timothy
Roscoe and Simon Peter",
title = "A declarative language approach to device
configuration",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "119--132",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950382",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Ryzhyk:2011:IDD,
author = "Leonid Ryzhyk and John Keys and Balachandra Mirla and
Arun Raghunath and Mona Vij and Gernot Heiser",
title = "Improved device driver reliability through hardware
verification reuse",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "133--144",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950383",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Hashmi:2011:CNI,
author = "Atif Hashmi and Andrew Nere and James Jamal Thomas and
Mikko Lipasti",
title = "A case for neuromorphic {ISAs}",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "145--158",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950385",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Ransford:2011:MSS,
author = "Benjamin Ransford and Jacob Sorber and Kevin Fu",
title = "{Mementos}: system support for long-running
computation on {RFID}-scale devices",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "159--170",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950386",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Koukoumidis:2011:PC,
author = "Emmanouil Koukoumidis and Dimitrios Lymberopoulos and
Karin Strauss and Jie Liu and Doug Burger",
title = "Pocket cloudlets",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "171--184",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950387",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Sharma:2011:BMS,
author = "Navin Sharma and Sean Barker and David Irwin and
Prashant Shenoy",
title = "{Blink}: managing server clusters on intermittent
power",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "185--198",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950389",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Hoffmann:2011:DKR,
author = "Henry Hoffmann and Stelios Sidiroglou and Michael
Carbin and Sasa Misailovic and Anant Agarwal and Martin
Rinard",
title = "Dynamic knobs for responsive power-aware computing",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "199--212",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950390",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Liu:2011:FSD,
author = "Song Liu and Karthik Pattabiraman and Thomas
Moscibroda and Benjamin G. Zorn",
title = "{Flikker}: saving {DRAM} refresh-power through
critical data partitioning",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "213--224",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950391",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Deng:2011:MAL,
author = "Qingyuan Deng and David Meisner and Luiz Ramos and
Thomas F. Wenisch and Ricardo Bianchini",
title = "{MemScale}: active low-power modes for main memory",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "225--238",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950392",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Gao:2011:TMH,
author = "Qi Gao and Wenbin Zhang and Zhezhe Chen and Mai Zheng
and Feng Qin",
title = "{2ndStrike}: toward manifesting hidden concurrency
typestate bugs",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "239--250",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950394",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Zhang:2011:CDC,
author = "Wei Zhang and Junghee Lim and Ramya Olichandran and
Joel Scherpelz and Guoliang Jin and Shan Lu and Thomas
Reps",
title = "{ConSeq}: detecting concurrency bugs through
sequential errors",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "251--264",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950395",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Chipounov:2011:SPV,
author = "Vitaly Chipounov and Volodymyr Kuznetsov and George
Candea",
title = "{S2E}: a platform for in-vivo multi-path analysis of
software systems",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "265--278",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950396",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Hofmann:2011:EOS,
author = "Owen S. Hofmann and Alan M. Dunn and Sangman Kim and
Indrajit Roy and Emmett Witchel",
title = "Ensuring operating system kernel integrity with
{OSck}",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "279--290",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950398",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Porter:2011:RLT,
author = "Donald E. Porter and Silas Boyd-Wickizer and Jon
Howell and Reuben Olinsky and Galen C. Hunt",
title = "Rethinking the library {OS} from the top down",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "291--304",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950399",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Palix:2011:FLT,
author = "Nicolas Palix and Ga{\"e}l Thomas and Suman Saha and
Christophe Calv{\`e}s and Julia Lawall and Gilles
Muller",
title = "Faults in {Linux}: ten years later",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "305--318",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950401",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Esmaeilzadeh:2011:LBL,
author = "Hadi Esmaeilzadeh and Ting Cao and Yang Xi and Stephen
M. Blackburn and Kathryn S. McKinley",
title = "Looking back on the language and hardware revolutions:
measured power, performance, and scaling",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "319--332",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950402",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Nguyen:2011:SCS,
author = "Donald Nguyen and Keshav Pingali",
title = "Synthesizing concurrent schedulers for irregular
algorithms",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "333--344",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950404",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Hoang:2011:ECT,
author = "Giang Hoang and Robby Bruce Findler and Russ Joseph",
title = "Exploring circuit timing-aware language and
compilation",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "345--356",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950405",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Farhad:2011:OAM,
author = "Sardar M. Farhad and Yousun Ko and Bernd Burgstaller
and Bernhard Scholz",
title = "Orchestration by approximation: mapping stream
programs onto multicore architectures",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "357--368",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950406",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Zhang:2011:FED,
author = "Eddy Z. Zhang and Yunlian Jiang and Ziyu Guo and Kai
Tian and Xipeng Shen",
title = "On-the-fly elimination of dynamic irregularities for
{GPU} computing",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "369--380",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950408",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Hormati:2011:SPS,
author = "Amir H. Hormati and Mehrzad Samadi and Mark Woh and
Trevor Mudge and Scott Mahlke",
title = "{Sponge}: portable stream programming on graphics
engines",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "381--392",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950409",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Kamruzzaman:2011:ICP,
author = "Md Kamruzzaman and Steven Swanson and Dean M.
Tullsen",
title = "Inter-core prefetching for multicore processors using
migrating helper threads",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "393--404",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950411",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Hayashizaki:2011:IPT,
author = "Hiroshige Hayashizaki and Peng Wu and Hiroshi Inoue
and Mauricio J. Serrano and Toshio Nakatani",
title = "Improving the performance of trace-based systems by
false loop filtering",
journal = j-SIGPLAN,
volume = "46",
number = "3",
pages = "405--418",
month = mar,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1961296.1950412",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:08 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '11 conference proceedings",
}
@Article{Bala:2011:DTD,
author = "Vasanth Bala and Evelyn Duesterwald and Sanjeev
Banerjia",
title = "{Dynamo}: a transparent dynamic optimization system",
journal = j-SIGPLAN,
volume = "46",
number = "4",
pages = "41--52",
month = apr,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1988042.1988044",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:07 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe the design and implementation of Dynamo, a
software dynamic optimization system that is capable of
transparently improving the performance of a native
instruction stream as it executes on the processor. The
input native instruction stream to Dynamo can be
dynamically generated (by a JIT for example), or it can
come from the execution of a statically compiled native
binary. This paper evaluates the Dynamo system in the
latter, more challenging situation, in order to
emphasize the limits, rather than the potential, of the
system. Our experiments demonstrate that even
statically optimized native binaries can be accelerated
Dynamo, and often by a significant degree.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Claessen:2011:QLT,
author = "Koen Claessen and John Hughes",
title = "{QuickCheck}: a lightweight tool for random testing of
{Haskell} programs",
journal = j-SIGPLAN,
volume = "46",
number = "4",
pages = "53--64",
month = apr,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1988042.1988046",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:07 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "QuickCheck is a tool which aids the Haskell programmer
in formulating and testing properties of programs.
Properties are described as Haskell functions, and can
be automatically tested on random input, but it is also
possible to define custom test data generators. We
present a number of case studies, in which the tool was
successfully used, and also point out some pitfalls to
avoid. Random testing is especially suitable for
functional programs because properties can be stated at
a fine grain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Arnold:2011:AOJ,
author = "Matthew Arnold and Stephen Fink and David Grove and
Michael Hind and Peter F. Sweeney",
title = "Adaptive optimization in the {Jalapeno JVM}",
journal = j-SIGPLAN,
volume = "46",
number = "4",
pages = "65--83",
month = apr,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1988042.1988048",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:07 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Future high-performance virtual machines will improve
performance through sophisticated online
feedback-directed optimizations. This paper presents
the architecture of the Jalapeno Adaptive Optimization
System, a system to support leading-edge virtual
machine technology and enable ongoing research on
online feedback-directed optimizations. We describe the
extensible system architecture, based on a federation
of threads with asynchronous communication. We present
an implementation of the general architecture that
supports adaptive multi-level optimization based purely
on statistical sampling. We empirically demonstrate
that this profiling technique has low overhead and can
improve startup and steady-state performance, even
without the presence of online feedback-directed
optimizations. The paper also describes and evaluates
an online feedback-directed inlining optimization based
on statistical edge sampling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ishtiaq:2011:BAL,
author = "Samin Ishtiaq and Peter W. O'Hearn",
title = "{BI} as an assertion language for mutable data
structures",
journal = j-SIGPLAN,
volume = "46",
number = "4",
pages = "84--96",
month = apr,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1988042.1988050",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 24 10:55:07 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reynolds has developed a logic for reasoning about
mutable data structures in which the pre- and
postconditions are written in an intuitionistic logic
enriched with a spatial form of conjunction. We
investigate the approach from the point of view of the
logic BI of bunched implications of O'Hearn and Pym. We
begin by giving a model in which the law of the
excluded middle holds, thus showing that the approach
is compatible with classical logic. The relationship
between the intuitionistic and classical versions of
the system is established by a translation, analogous
to a translation from intuitionistic logic into the
modal logic S4.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Virlet:2011:SSB,
author = "Bruno Virlet and Xing Zhou and Jean Pierre Giacalone
and Bob Kuhn and Maria J. Garzaran and David Padua",
title = "Scheduling of stream-based real-time applications for
heterogeneous systems",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "1--10",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967679",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Chattopadhyay:2011:SBS,
author = "Sudipta Chattopadhyay and Abhik Roychoudhury",
title = "Static bus schedule aware scratchpad allocation in
multiprocessors",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "11--20",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967680",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Albert:2011:TLA,
author = "Elvira Albert and Puri Arenas and Samir Genaim and
Damiano Zanardini",
title = "Task-level analysis for a language with async\slash
finish parallelism",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "21--30",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967681",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Chang:2011:LCW,
author = "Li-Pin Chang and Li-Chun Huang",
title = "A low-cost wear-leveling algorithm for block-mapping
solid-state disks",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "31--40",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967683",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multilevel flash memory cells double or even triple
storage density, producing affordable solid-state disks
for end users. However, flash lifetime is becoming a
critical issue in the popularity of solid-state disks.
Wear-leveling methods can prevent flash-storage devices
from prematurely retiring any portions of flash memory.
The two practical challenges of wear-leveling design
are implementation cost and tuning complexity. This
study proposes a new wear-leveling design that features
both simplicity and adaptiveness. This design requires
no new data structures, but utilizes the intelligence
available in sector-translating algorithms. Using an
on-line tuning method, this design adaptively tunes
itself to reach good balance between wear evenness and
overhead. A series of trace-driven simulations show
that the proposed design outperforms a competitive
existing design in terms of wear evenness and overhead
reduction. This study also presents a prototype that
proves the feasibility of this wear-leveling design in
real solid-state disks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Saha:2011:AIS,
author = "Suman Saha and Julia Lawall and Gilles Muller",
title = "An approach to improving the structure of
error-handling code in the {Linux} kernel",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "41--50",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967684",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Gray:2011:TCE,
author = "Ian Gray and Neil C. Audsley",
title = "Targeting complex embedded architectures by combining
the multicore communications {API} ({{\tt mcapi}}) with
compile-time virtualisation",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "51--60",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967685",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Benveniste:2011:DRT,
author = "Albert Benveniste and Timothy Bourke and Beno{\^\i}t
Caillaud and Marc Pouzet",
title = "Divide and recycle: types and compilation for a hybrid
synchronous language",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "61--70",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967687",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Gamatie:2011:SAS,
author = "Abdoulaye Gamatie and Laure Gonnord",
title = "Static analysis of synchronous programs in signal for
efficient design of multi-clocked embedded systems",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "71--80",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967688",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Berthier:2011:SPD,
author = "Nicolas Berthier and Florence Maraninchi and Laurent
Mounier",
title = "Synchronous programming of device drivers for global
resource control in embedded operating systems",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "81--90",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967689",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Wang:2011:DBM,
author = "Man Wang and Zhiyuan Li and Feng Li and Xiaobing Feng
and Saurabh Bagchi and Yung-Hsiang Lu",
title = "Dependence-based multi-level tracing and replay for
wireless sensor networks debugging",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "91--100",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967691",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Thomas:2011:LOS,
author = "Johnson J. Thomas and Sebastian Fischmeister and
Deepak Kumar",
title = "Lowering overhead in sampling-based execution
monitoring and tracing",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "101--110",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967692",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Navabpour:2011:SDT,
author = "Samaneh Navabpour and Borzoo Bonakdarpour and
Sebastian Fischmeister",
title = "Software debugging and testing using the abstract
diagnosis theory",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "111--120",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967693",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Cullmann:2011:CPA,
author = "Christoph Cullmann",
title = "Cache persistence analysis: a novel approachtheory and
practice",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "121--130",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967695",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Sarkar:2011:PTM,
author = "Abhik Sarkar and Frank Mueller and Harini Ramaprasad",
title = "Predictable task migration for locked caches in
multi-core systems",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "131--140",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967696",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Althaus:2011:PEP,
author = "Ernst Althaus and Sebastian Altmeyer and Rouven
Naujoks",
title = "Precise and efficient parametric path analysis",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "141--150",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967697",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Jang:2011:ISA,
author = "Choonki Jang and Jungwon Kim and Jaejin Lee and
Hee-Seok Kim and Dong-Hoon Yoo and Sukjin Kim and
Hong-Seok Kim and Soojung Ryu",
title = "An instruction-scheduling-aware data partitioning
technique for coarse-grained reconfigurable
architectures",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "151--160",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967699",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Bhagat:2011:GPP,
author = "Indu Bhagat and Enric Gibert and Jes{\'u}s S{\'a}nchez
and Antonio Gonz{\'a}lez",
title = "Global productiveness propagation: a code optimization
technique to speculatively prune useless narrow
computations",
journal = j-SIGPLAN,
volume = "46",
number = "5",
pages = "161--170",
month = may,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2016603.1967700",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Aug 18 13:30:54 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '10 conference proceedings",
}
@Article{Prabhu:2011:CSL,
author = "Prakash Prabhu and Soumyadeep Ghosh and Yun Zhang and
Nick P. Johnson and David I. August",
title = "Commutative set: a language extension for implicit
parallel programming",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "1--11",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993500",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pingali:2011:TPA,
author = "Keshav Pingali and Donald Nguyen and Milind Kulkarni
and Martin Burtscher and M. Amber Hassaan and Rashid
Kaleem and Tsung-Hsien Lee and Andrew Lenharth and
Roman Manevich and Mario M{\'e}ndez-Lojo and Dimitrios
Prountzos and Xin Sui",
title = "The tao of parallelism in algorithms",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "12--25",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993501",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Raman:2011:POU,
author = "Arun Raman and Hanjun Kim and Taewook Oh and Jae W.
Lee and David I. August",
title = "Parallelism orchestration using {DoPE}: the degree of
parallelism executive",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "26--37",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993502",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hawkins:2011:DRS,
author = "Peter Hawkins and Alex Aiken and Kathleen Fisher and
Martin Rinard and Mooly Sagiv",
title = "Data representation synthesis",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "38--49",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993504",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gulwani:2011:SGC,
author = "Sumit Gulwani and Vijay Anand Korthikanti and Ashish
Tiwari",
title = "Synthesizing geometry constructions",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "50--61",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993505",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gulwani:2011:SLF,
author = "Sumit Gulwani and Susmit Jha and Ashish Tiwari and
Ramarathnam Venkatesan",
title = "Synthesis of loop-free programs",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "62--73",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993506",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bohm:2011:GJT,
author = "Igor B{\"o}hm and Tobias J. K. Edler von Koch and
Stephen C. Kyle and Bj{\"o}rn Franke and Nigel Topham",
title = "Generalized just-in-time trace compilation using a
parallel task farm in a dynamic binary translator",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "74--85",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993508",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jung:2011:BES,
author = "Changhee Jung and Silvius Rus and Brian P. Railing and
Nathan Clark and Santosh Pande",
title = "{Brainy}: effective selection of data structures",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "86--97",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhou:2011:SBA,
author = "Hucheng Zhou and Wenguang Chen and Fred Chow",
title = "An {SSA}-based algorithm for optimal speculative code
motion under an execution profile",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "98--108",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993510",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2011:CHD,
author = "Xun Li and Mohit Tiwari and Jason K. Oberg and Vineeth
Kashyap and Frederic T. Chong and Timothy Sherwood and
Ben Hardekopf",
title = "{Caisson}: a hardware description language for secure
information flow",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "109--120",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Murray:2011:SAO,
author = "Derek Gordon Murray and Michael Isard and Yuan Yu",
title = "{Steno}: automatic optimization of declarative
queries",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "121--131",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993513",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tobin-Hochstadt:2011:LL,
author = "Sam Tobin-Hochstadt and Vincent St-Amour and Ryan
Culpepper and Matthew Flatt and Matthias Felleisen",
title = "Languages as libraries",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "132--141",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993514",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jablin:2011:ACG,
author = "Thomas B. Jablin and Prakash Prabhu and James A.
Jablin and Nick P. Johnson and Stephen R. Beard and
David I. August",
title = "Automatic {CPU--GPU} communication management and
optimization",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "142--151",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993516",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Prasad:2011:ACM,
author = "Ashwin Prasad and Jayvant Anantpur and R.
Govindarajan",
title = "Automatic compilation of {MATLAB} programs for
synergistic execution on heterogeneous processors",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "152--163",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sampson:2011:EAD,
author = "Adrian Sampson and Werner Dietl and Emily Fortuna and
Danushen Gnanapragasam and Luis Ceze and Dan Grossman",
title = "{EnerJ}: approximate data types for safe and general
low-power computation",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "164--174",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sarkar:2011:UPM,
author = "Susmit Sarkar and Peter Sewell and Jade Alglave and
Luc Maranget and Derek Williams",
title = "Understanding {POWER} multiprocessors",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "175--186",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993520",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kuperstein:2011:PCA,
author = "Michael Kuperstein and Martin Vechev and Eran Yahav",
title = "Partial-coherence abstractions for relaxed memory
models",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "187--198",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Marino:2011:CSP,
author = "Daniel Marino and Abhayendra Singh and Todd Millstein
and Madanlal Musuvathi and Satish Narayanasamy",
title = "A case for an {SC}-preserving compiler",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "199--210",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The most intuitive memory consistency model for
shared-memory multi-threaded programming is sequential
consistency (SC). However, current concurrent
programming languages support a relaxed model, as such
relaxations are deemed necessary for enabling important
optimizations. This paper demonstrates that an
SC-preserving compiler, one that ensures that every SC
behavior of a compiler-generated binary is an SC
behavior of the source program, retains most of the
performance benefits of an optimizing compiler. The key
observation is that a large class of optimizations
crucial for performance are either already
SC-preserving or can be modified to preserve SC while
retaining much of their effectiveness. An SC-preserving
compiler, obtained by restricting the optimization
phases in LLVM, a state-of-the-art C/C++ compiler,
incurs an average slowdown of 3.8\% and a maximum
slowdown of 34\% on a set of 30 programs from the
SPLASH-2, PARSEC, and SPEC CINT2006 benchmark
suites.\par
While the performance overhead of preserving SC in the
compiler is much less than previously assumed, it might
still be unacceptable for certain applications. We
believe there are several avenues for improving
performance without giving up SC-preservation. In this
vein, we observe that the overhead of our SC-preserving
compiler arises mainly from its inability to
aggressively perform a class of optimizations we
identify as eager-load optimizations. This class
includes common-subexpression elimination, constant
propagation, global value numbering, and common cases
of loop-invariant code motion. We propose a notion of
interference checks in order to enable eager-load
optimizations while preserving SC. Interference checks
expose to the compiler a commonly used hardware
speculation mechanism that can efficiently detect
whether a particular variable has changed its value
since last read.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "LLVM compiler suite; sequential consistency (SC)",
}
@Article{Beckman:2011:PMS,
author = "Nels E. Beckman and Aditya V. Nori",
title = "Probabilistic, modular and scalable inference of
typestate specifications",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "211--221",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993524",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kobayashi:2011:PAC,
author = "Naoki Kobayashi and Ryosuke Sato and Hiroshi Unno",
title = "Predicate abstraction and {CEGAR} for higher-order
model checking",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "222--233",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chlipala:2011:MAV,
author = "Adam Chlipala",
title = "Mostly-automated verification of low-level programs in
computational separation logic",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "234--245",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lee:2011:TGR,
author = "Kyu Hyung Lee and Yunhui Zheng and Nick Sumner and
Xiangyu Zhang",
title = "Toward generating reducible replay logs",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "246--257",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993528",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Godefroid:2011:HOT,
author = "Patrice Godefroid",
title = "Higher-order test generation",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "258--269",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Xu:2011:LHP,
author = "Guoqing Xu and Michael D. Bond and Feng Qin and Atanas
Rountev",
title = "{LeakChaser}: helping programmers narrow down causes
of memory leaks",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "270--282",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993530",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yang:2011:FUB,
author = "Xuejun Yang and Yang Chen and Eric Eide and John
Regehr",
title = "Finding and understanding bugs in {C} compilers",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "283--294",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993532",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compilers should be correct. To improve the quality of
C compilers, we created Csmith, a randomized test-case
generation tool, and spent three years using it to find
compiler bugs. During this period we reported more than
325 previously unknown bugs to compiler developers.
Every compiler we tested was found to crash and also to
silently generate wrong code when presented with valid
input. In this paper we present our compiler-testing
tool and the results of our bug-hunting study. Our
first contribution is to advance the state of the art
in compiler testing. Unlike previous tools, Csmith
generates programs that cover a large subset of C while
avoiding the undefined and unspecified behaviors that
would destroy its ability to automatically find
wrong-code bugs. Our second contribution is a
collection of qualitative and quantitative results
about the bugs we have found in open-source C
compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tristan:2011:EVG,
author = "Jean-Baptiste Tristan and Paul Govereau and Greg
Morrisett",
title = "Evaluating value-graph translation validation for
{LLVM}",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "295--305",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993533",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sevcik:2011:SOS,
author = "Jaroslav Sevc{\'\i}k",
title = "Safe optimisations for shared-memory concurrent
programs",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "306--316",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Harris:2011:STT,
author = "William R. Harris and Sumit Gulwani",
title = "Spreadsheet table transformations from examples",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "317--328",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Meng:2011:SEG,
author = "Na Meng and Miryung Kim and Kathryn S. McKinley",
title = "Systematic editing: generating program transformations
from an example",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "329--342",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993537",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Srivastava:2011:SPO,
author = "Varun Srivastava and Michael D. Bond and Kathryn S.
McKinley and Vitaly Shmatikov",
title = "A security policy oracle: detecting security holes
using multiple {API} implementations",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "343--354",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ansel:2011:LIS,
author = "Jason Ansel and Petr Marchenko and Ulfar Erlingsson
and Elijah Taylor and Brad Chen and Derek L. Schuff and
David Sehr and Cliff L. Biffle and Bennet Yee",
title = "Language-independent sandboxing of just-in-time
compilation and self-modifying code",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "355--366",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993540",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zeng:2011:CCH,
author = "Qiang Zeng and Dinghao Wu and Peng Liu",
title = "{Cruiser}: concurrent heap buffer overflow monitoring
using lock-free data structures",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "367--377",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993541",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lucia:2011:IUC,
author = "Brandon Lucia and Benjamin P. Wood and Luis Ceze",
title = "Isolating and understanding concurrency errors using
reconstructed execution fragments",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "378--388",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993543",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jin:2011:AAV,
author = "Guoliang Jin and Linhai Song and Wei Zhang and Shan Lu
and Ben Liblit",
title = "Automated atomicity-violation fixing",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "389--400",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993544",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Burnim:2011:NRC,
author = "Jacob Burnim and Tayfun Elmas and George Necula and
Koushik Sen",
title = "{NDSeq}: runtime checking for nondeterministic
sequential specifications of parallel correctness",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "401--414",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993545",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jin:2011:GCM,
author = "Dongyun Jin and Patrick O'Neil Meredith and Dennis
Griffith and Grigore Rosu",
title = "Garbage collection for monitoring parametric
properties",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "415--424",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993547",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Parr:2011:LFA,
author = "Terence Parr and Kathleen Fisher",
title = "{LL(*)}: the foundation of the {ANTLR} parser
generator",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "425--436",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993548",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the power of Parser Expression Grammars (PEGs)
and GLR, parsing is not a solved problem. Adding
nondeterminism (parser speculation) to traditional LL
and LR parsers can lead to unexpected parse-time
behavior and introduces practical issues with error
handling, single-step debugging, and side-effecting
embedded grammar actions. This paper introduces the
LL(*) parsing strategy and an associated grammar
analysis algorithm that constructs LL(*) parsing
decisions from ANTLR grammars. At parse-time, decisions
gracefully throttle up from conventional fixed $ k > =
1 $ lookahead to arbitrary lookahead and, finally, fail
over to backtracking depending on the complexity of the
parsing decision and the input symbols. LL(*) parsing
strength reaches into the context-sensitive languages,
in some cases beyond what GLR and PEGs can express. By
statically removing as much speculation as possible,
LL(*) provides the expressivity of PEGs while retaining
LL's good error handling and unrestricted grammar
actions. Widespread use of ANTLR (over 70,000
downloads/year) shows that it is effective for a wide
variety of applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jose:2011:CCC,
author = "Manu Jose and Rupak Majumdar",
title = "Cause clue clauses: error localization using maximum
satisfiability",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "437--446",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993550",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Budi:2011:AMA,
author = "Aditya Budi and David Lo and Lingxiao Jiang and
Lucia",
title = "$ k b $-anonymity: a model for anonymized
behaviour-preserving test and debugging data",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "447--457",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993551",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Garcia:2011:KRR,
author = "Saturnino Garcia and Donghwan Jeon and Christopher M.
Louie and Michael Bedford Taylor",
title = "{Kremlin}: rethinking and rebooting {{\tt gprof}} for
the multicore age",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "458--469",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993553",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many recent parallelization tools lower the barrier
for parallelizing a program, but overlook one of the
first questions that a programmer needs to answer:
which parts of the program should I spend time
parallelizing?\par
This paper examines Kremlin, an automatic tool that,
given a serial version of a program, will make
recommendations to the user as to what regions (e.g.
loops or functions) of the program to attack first.
Kremlin introduces a novel hierarchical critical path
analysis and develops a new metric for estimating the
potential of parallelizing a region: self-parallelism.
We further introduce the concept of a parallelism
planner, which provides a ranked order of specific
regions to the programmer that are likely to have the
largest performance impact when parallelized. Kremlin
supports multiple planner personalities, which allow
the planner to more effectively target a particular
programming environment or class of machine.\par
We demonstrate the effectiveness of one such
personality, an OpenMP planner, by comparing versions
of programs that are parallelized according to
Kremlin's plan against third-party manually
parallelized versions. The results show that Kremlin's
OpenMP planner is highly effective, producing plans
whose performance is typically comparable to, and
sometimes much better than, manual parallelization. At
the same time, these plans would require that the user
parallelize significantly fewer regions of the
program.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sato:2011:APM,
author = "Shigeyuki Sato and Hideya Iwasaki",
title = "Automatic parallelization via matrix multiplication",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "470--479",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993554",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Udupa:2011:AEB,
author = "Abhishek Udupa and Kaushik Rajan and William Thies",
title = "{ALTER}: exploiting breakable dependences for
parallelization",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "480--491",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993555",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Srivastava:2011:PBI,
author = "Saurabh Srivastava and Sumit Gulwani and Swarat
Chaudhuri and Jeffrey S. Foster",
title = "Path-based inductive synthesis for program inversion",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "492--503",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993557",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Person:2011:DIS,
author = "Suzette Person and Guowei Yang and Neha Rungta and
Sarfraz Khurshid",
title = "Directed incremental symbolic execution",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "504--515",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993558",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{DElia:2011:MHC,
author = "Daniele Cono D'Elia and Camil Demetrescu and Irene
Finocchi",
title = "Mining hot calling contexts in small space",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "516--527",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993559",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kim:2011:VSC,
author = "Deokhwan Kim and Martin C. Rinard",
title = "Verification of semantic commutativity conditions and
inverse operations on linked data structures",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "528--541",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993561",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kulkarni:2011:ECL,
author = "Milind Kulkarni and Donald Nguyen and Dimitrios
Prountzos and Xin Sui and Keshav Pingali",
title = "Exploiting the commutativity lattice",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "542--555",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993562",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Perez:2011:SLS,
author = "Juan Antonio Navarro P{\'e}rez and Andrey
Rybalchenko",
title = "Separation logic $+$ superposition calculus $=$ heap
theorem prover",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "556--566",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993563",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dillig:2011:PCM,
author = "Isil Dillig and Thomas Dillig and Alex Aiken and Mooly
Sagiv",
title = "Precise and compact modular procedure summaries for
heap manipulating programs",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "567--577",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993565",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bouajjani:2011:IPA,
author = "Ahmed Bouajjani and Cezara Dragoi and Constantin Enea
and Mihaela Sighireanu",
title = "On inter-procedural analysis of programs with lists
and data",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "578--589",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993566",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liang:2011:SAR,
author = "Percy Liang and Mayur Naik",
title = "Scaling abstraction refinement via pruning",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "590--601",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993567",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Altidor:2011:TWC,
author = "John Altidor and Shan Shan Huang and Yannis
Smaragdakis",
title = "Taming the wildcards: combining definition- and
use-site variance",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "602--613",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tate:2011:TWJ,
author = "Ross Tate and Alan Leung and Sorin Lerner",
title = "Taming wildcards in {Java}'s type system",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "614--627",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993570",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ziarek:2011:CAE,
author = "Lukasz Ziarek and KC Sivaramakrishnan and Suresh
Jagannathan",
title = "Composable asynchronous events",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "628--639",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993572",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Best:2011:SST,
author = "Micah J. Best and Shane Mottishaw and Craig Mustard
and Mark Roth and Alexandra Fedorova and Andrew
Brownsword",
title = "Synchronization via scheduling: techniques for
efficiently managing shared state",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "640--652",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993573",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bacon:2011:VAH,
author = "David F. Bacon",
title = "Virtualization in the age of heterogeneous machines",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "1--2",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952684",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Du:2011:PPV,
author = "Jiaqing Du and Nipun Sehrawat and Willy Zwaenepoel",
title = "Performance profiling of virtual machines",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "3--14",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952686",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nikolaev:2011:PXF,
author = "Ruslan Nikolaev and Godmar Back",
title = "{Perfctr-Xen}: a framework for performance counter
virtualization",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "15--26",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952687",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhao:2011:DCC,
author = "Qin Zhao and David Koh and Syed Raza and Derek
Bruening and Weng-Fai Wong and Saman Amarasinghe",
title = "Dynamic cache contention detection in multi-threaded
applications",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "27--38",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952688",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wang:2011:RVM,
author = "Kun Wang and Jia Rao and Cheng-Zhong Xu",
title = "Rethink the virtual machine template",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "39--50",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952690",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cecchet:2011:DVD,
author = "Emmanuel Cecchet and Rahul Singh and Upendra Sharma
and Prashant Shenoy",
title = "{Dolly}: virtualization-driven database provisioning
for the cloud",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "51--62",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952691",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Le:2011:REV,
author = "Michael Le and Yuval Tamir",
title = "{ReHype}: enabling {VM} survival across hypervisor
failures",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "63--74",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952692",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Park:2011:FSE,
author = "Eunbyung Park and Bernhard Egger and Jaejin Lee",
title = "Fast and space-efficient virtual machine
checkpointing",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "75--86",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952694",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhang:2011:FRC,
author = "Irene Zhang and Alex Garthwaite and Yury Baskakov and
Kenneth C. Barr",
title = "Fast restore of checkpointed memory using working set
estimation",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "87--98",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952695",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kourai:2011:FCP,
author = "Kenichi Kourai",
title = "Fast and correct performance recovery of operating
systems using a virtual machine monitor",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "99--110",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952696",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Svard:2011:EDC,
author = "Petter Sv{\"a}rd and Benoit Hudzia and Johan Tordsson
and Erik Elmroth",
title = "Evaluation of delta compression techniques for
efficient live migration of large virtual machines",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "111--120",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952698",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wood:2011:CDP,
author = "Timothy Wood and K. K. Ramakrishnan and Prashant
Shenoy and Jacobus van der Merwe",
title = "{CloudNet}: dynamic pooling of cloud resources by live
{WAN} migration of virtual machines",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "121--132",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952699",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zheng:2011:WAL,
author = "Jie Zheng and Tze Sing Eugene Ng and Kunwadee
Sripanidkulchai",
title = "Workload-aware live storage migration for clouds",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "133--144",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952700",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Litty:2011:PAI,
author = "Lionel Litty and David Lie",
title = "Patch auditing in infrastructure as a service clouds",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "145--156",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952702",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Payer:2011:FGU,
author = "Mathias Payer and Thomas R. Gross",
title = "Fine-grained user-space security through
virtualization",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "157--168",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952703",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lange:2011:MOV,
author = "John R. Lange and Kevin Pedretti and Peter Dinda and
Patrick G. Bridges and Chang Bae and Philip Soltero and
Alexander Merritt",
title = "Minimal-overhead virtualization of a large scale
supercomputer",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "169--180",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952705",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Xia:2011:VWB,
author = "Lei Xia and Sanjay Kumar and Xue Yang and Praveen
Gopalakrishnan and York Liu and Sebastian Schoenberg
and Xingang Guo",
title = "Virtual {WiFi}: bring virtualization from wired to
wireless",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "181--192",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952706",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lange:2011:SSV,
author = "John R. Lange and Peter Dinda",
title = "{SymCall}: symbiotic virtualization through
{VMM}-to-guest upcalls",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "193--204",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952707",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Williams:2011:OHM,
author = "Dan Williams and Hani Jamjoom and Yew-Huey Liu and
Hakim Weatherspoon",
title = "{Overdriver}: handling memory overload in an
oversubscribed cloud",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "205--216",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952709",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wang:2011:SHS,
author = "Xiaolin Wang and Jiarui Zang and Zhenlin Wang and
Yingwei Luo and Xiaoming Li",
title = "Selective hardware\slash software memory
virtualization",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "217--226",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952710",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Roy:2011:HBR,
author = "Amitabha Roy and Steven Hand and Tim Harris",
title = "Hybrid binary rewriting for memory access
instrumentation",
journal = j-SIGPLAN,
volume = "46",
number = "7",
pages = "227--238",
month = jul,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2007477.1952711",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 16 10:02:34 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Larus:2011:PC,
author = "James R. Larus",
title = "Programming the cloud",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "1--2",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941555",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Client + cloud computing is a disruptive, new
computing platform, combining diverse client devices
--- PCs, smartphones, sensors, and single-function and
embedded devices --- with the unlimited, on-demand
computation and data storage offered by cloud computing
services such as Amazon's AWS or Microsoft's Windows
Azure. As with every advance in computing, programming
is a fundamental challenge as client + cloud computing
combines many difficult aspects of software
development. Systems built for this world are
inherently parallel and distributed, run on unreliable
hardware, and must be continually available --- a
challenging programming model for even the most skilled
programmers. How then do ordinary programmers develop
software for the Cloud? This talk presents one answer,
Orleans, a software framework for building client +
cloud applications. Orleans encourages use of simple
concurrency patterns that are easy to understand and
implement correctly, building on an actor-like model
with declarative specification of persistence,
replication, and consistency and using lightweight
transactions to support the development of reliable and
scalable client + cloud software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hassaan:2011:OVU,
author = "Muhammad Amber Hassaan and Martin Burtscher and Keshav
Pingali",
title = "Ordered vs. unordered: a comparison of parallelism and
work-efficiency in irregular algorithms",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "3--12",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941557",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Outside of computational science, most problems are
formulated in terms of irregular data structures such
as graphs, trees and sets. Unfortunately, we understand
relatively little about the structure of parallelism
and locality in irregular algorithms. In this paper, we
study multiple algorithms for four such problems:
discrete-event simulation, single-source shortest path,
breadth-first search, and minimal spanning trees. We
show that the algorithms can be classified into two
categories that we call unordered and ordered, and
demonstrate experimentally that there is a trade-off
between parallelism and work efficiency: unordered
algorithms usually have more parallelism than their
ordered counterparts for the same problem, but they may
also perform more work. Nevertheless, our experimental
results show that unordered algorithms typically lead
to more scalable implementations, demonstrating that
less work-efficient irregular algorithms may be better
for parallel execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bauer:2011:PMH,
author = "Michael Bauer and John Clark and Eric Schkufza and
Alex Aiken",
title = "Programming the memory hierarchy revisited: supporting
irregular parallelism in {Sequoia}",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "13--24",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941558",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We describe two novel constructs for programming
parallel machines with multi-level memory hierarchies:
call-up, which allows a child task to invoke
computation on its parent, and spawn, which spawns a
dynamically determined number of parallel children
until some termination condition in the parent is met.
Together we show that these constructs allow
applications with irregular parallelism to be
programmed in a straightforward manner, and furthermore
these constructs complement and can be combined with
constructs for expressing regular parallelism. We have
implemented spawn and call-up in Sequoia and we present
an experimental evaluation on a number of irregular
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Murarasu:2011:CDS,
author = "Alin Murarasu and Josef Weidendorfer and Gerrit Buse
and Daniel Butnaru and Dirk Pfl{\"u}ger",
title = "Compact data structure and scalable algorithms for the
sparse grid technique",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "25--34",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941559",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "The sparse grid discretization technique enables a
compressed representation of higher-dimensional
functions. In its original form, it relies heavily on
recursion and complex data structures, thus being far
from well-suited for GPUs. In this paper, we describe
optimizations that enable us to implement compression
and decompression, the crucial sparse grid algorithms
for our application, on Nvidia GPUs. The main idea
consists of a bijective mapping between the set of
points in a multi-dimensional sparse grid and a set of
consecutive natural numbers. The resulting data
structure consumes a minimum amount of memory. For a
10-dimensional sparse grid with approximately 127
million points, it consumes up to 30 times less memory
than trees or hash tables which are typically used.
Compared to a sequential CPU implementation, the
speedups achieved on GPU are up to 17 for compression
and up to 70 for decompression, respectively. We show
that the optimizations are also applicable to multicore
CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chafi:2011:DSA,
author = "Hassan Chafi and Arvind K. Sujeeth and Kevin J. Brown
and HyoukJoong Lee and Anand R. Atreya and Kunle
Olukotun",
title = "A domain-specific approach to heterogeneous
parallelism",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "35--46",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941561",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Exploiting heterogeneous parallel hardware currently
requires mapping application code to multiple disparate
programming models. Unfortunately, general-purpose
programming models available today can yield high
performance but are too low-level to be accessible to
the average programmer. We propose leveraging
domain-specific languages (DSLs) to map high-level
application code to heterogeneous devices. To
demonstrate the potential of this approach we present
OptiML, a DSL for machine learning. OptiML programs are
implicitly parallel and can achieve high performance on
heterogeneous hardware with no modification required to
the source code. For such a DSL-based approach to be
tractable at large scales, better tools are required
for DSL authors to simplify language creation and
parallelization. To address this concern, we introduce
Delite, a system designed specifically for DSLs that is
both a framework for creating an implicitly parallel
DSL as well as a dynamic runtime providing automated
targeting to heterogeneous parallel hardware. We show
that OptiML running on Delite achieves single-threaded,
parallel, and GPU performance superior to explicitly
parallelized MATLAB code in nearly all cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Catanzaro:2011:CCE,
author = "Bryan Catanzaro and Michael Garland and Kurt Keutzer",
title = "{Copperhead}: compiling an embedded data parallel
language",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "47--56",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941562",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Modern parallel microprocessors deliver high
performance on applications that expose substantial
fine-grained data parallelism. Although data
parallelism is widely available in many computations,
implementing data parallel algorithms in low-level
languages is often an unnecessarily difficult task. The
characteristics of parallel microprocessors and the
limitations of current programming methodologies
motivate our design of Copperhead, a high-level data
parallel language embedded in Python. The Copperhead
programmer describes parallel computations via
composition of familiar data parallel primitives
supporting both flat and nested data parallel
computation on arrays of data. Copperhead programs are
expressed in a subset of the widely used Python
programming language and interoperate with standard
Python modules, including libraries for numeric
computation, data visualization, and analysis. In this
paper, we discuss the language, compiler, and runtime
features that enable Copperhead to efficiently execute
data parallel code. We define the restricted subset of
Python which Copperhead supports and introduce the
program analysis techniques necessary for compiling
Copperhead code into efficient low-level
implementations. We also outline the runtime support by
which Copperhead programs interoperate with standard
Python modules. We demonstrate the effectiveness of our
techniques with several examples targeting the CUDA
platform for parallel programming on GPUs. Copperhead
code is concise, on average requiring 3.6 times fewer
lines of code than CUDA, and the compiler generates
efficient code, yielding 45-100\% of the performance of
hand-crafted, well optimized CUDA code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jenista:2011:OSO,
author = "James Christopher Jenista and Yong hun Eom and Brian
Charles Demsky",
title = "{OoOJava}: software out-of-order execution",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "57--68",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941563",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Developing parallel software using current tools can
be challenging. Even experts find it difficult to
reason about the use of locks and often accidentally
introduce race conditions and deadlocks into parallel
software. OoOJava is a compiler-assisted approach that
leverages developer annotations along with static
analysis to provide an easy-to-use deterministic
parallel programming model. OoOJava extends Java with a
task annotation that instructs the compiler to consider
a code block for out-of-order execution. OoOJava
executes tasks as soon as their data dependences are
resolved and guarantees that the execution of an
annotated program preserves the exact semantics of the
original sequential program. We have implemented
OoOJava and achieved an average speedup of 16.6x on our
ten benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Feng:2011:SSP,
author = "Min Feng and Rajiv Gupta and Yi Hu",
title = "{SpiceC}: scalable parallelism via implicit copying
and explicit commit",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "69--80",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941564",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "In this paper we present an approach to parallel
programming called SpiceC. SpiceC simplifies the task
of parallel programming through a combination of an
intuitive computation model and SpiceC directives. The
SpiceC parallel computation model consists of multiple
threads where every thread has a private space for data
and all threads share data via a shared space. Each
thread performs computations using its private space
thus offering isolation which allows for speculative
computations. SpiceC provides easy to use SpiceC
compiler directives using which the programmers can
express different forms of parallelism. It allows
developers to express high level constraints on data
transfers between spaces while the tedious task of
generating the code for the data transfers is performed
by the compiler. SpiceC also supports data transfers
involving dynamic data structures without help from
developers. SpiceC allows developers to create clusters
of data to enable parallel data transfers. SpiceC
programs are portable across modern chip multiprocessor
based machines that may or may not support cache
coherence. We have developed implementations of SpiceC
for shared memory systems with and without cache
coherence. We evaluate our implementation using seven
benchmarks of which four are parallelized
speculatively. Our compiler generated implementations
achieve speedups ranging from 2x to 18x on a 24 core
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Negara:2011:IOT,
author = "Stas Negara and Rajesh K. Karmani and Gul Agha",
title = "Inferring ownership transfer for efficient message
passing",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "81--90",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941566",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "One of the more popular paradigms for concurrent
programming is the Actor model of message passing; it
has been adopted in one form or another by a number of
languages and frameworks. By avoiding a shared local
state and instead relying on message passing, the Actor
model facilitates modular programming. An important
challenge for message passing languages is to transmit
messages efficiently. This requires retaining the
pass-by-value semantics of messages while avoiding
making a deep copy on sequential or shared memory
multicore processors. A key observation is that many
messages have an ownership transfer semantics; such
messages can be sent efficiently using pointers without
introducing shared state between concurrent objects. We
propose a conservative static analysis algorithm which
infers if the content of a message is compatible with
an ownership transfer semantics. Our tool, called SOTER
(for Safe Ownership Transfer enablER) transforms the
program to avoid the cost of copying the contents of a
message whenever it can infer the content obeys the
ownership transfer semantics. Experiments using a range
of programs suggest that our conservative static
analysis method is usually able to infer ownership
transfer. Performance results demonstrate that the
transformed programs execute up to an order of
magnitude faster than the original programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Xiang:2011:AWP,
author = "Xiaoya Xiang and Bin Bao and Tongxin Bai and Chen Ding
and Trishul Chilimbi",
title = "All-window profiling and composable models of cache
sharing",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "91--102",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941567",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "As multi-core processors become commonplace and cloud
computing is gaining acceptance, more applications are
run in a shared cache environment. Cache sharing
depends on a concept called footprint, which depends on
all cache accesses not just cache misses. Previous work
has recognized the importance of footprint but has not
provided a method for accurate measurement, mainly
because the complete measurement requires counting data
access in all execution windows, which takes time
quadratic in the length of a trace. The paper first
presents an algorithm efficient enough for off-line use
to approximately measure the footprint with a
guaranteed precision. The cost of the analysis can be
adjusted by changing the precision. Then the paper
presents a composable model. For a set of programs, the
model uses the all-window footprint of each program to
predict its cache interference with other programs
without running these programs together. The paper
evaluates the efficiency of all-window profiling using
the SPEC 2000 benchmarks and compares the footprint
interference model with a miss-rate based model and
with exhaustive testing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ding:2011:UUL,
author = "Xiaoning Ding and Kaibo Wang and Xiaodong Zhang",
title = "{ULCC}: a user-level facility for optimizing shared
cache performance on multicores",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "103--112",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941568",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Scientific applications face serious performance
challenges on multicore processors, one of which is
caused by access contention in last level shared caches
from multiple running threads. The contention increases
the number of long latency memory accesses, and
consequently increases application execution times.
Optimizing shared cache performance is critical to
reduce significantly execution times of multi-threaded
programs on multicores. However, there are two unique
problems to be solved before implementing cache
optimization techniques on multicores at the user
level. First, available cache space for each running
thread in a last level cache is difficult to predict
due to access contention in the shared space, which
makes cache conscious algorithms for single cores
ineffective on multicores. Second, at the user level,
programmers are not able to allocate cache space at
will to running threads in the shared cache, thus data
sets with strong locality may not be allocated with
sufficient cache space, and cache pollution can easily
happen. To address these two critical issues, we have
designed ULCC (User Level Cache Control), a software
runtime library that enables programmers to explicitly
manage and optimize last level cache usage by
allocating proper cache space for different data sets
of different threads. We have implemented ULCC at the
user level based on a page-coloring technique for last
level cache usage management. By means of multiple case
studies on an Intel multicore processor, we show that
with ULCC, scientific applications can achieve
significant performance improvements by fully
exploiting the benefit of cache optimization algorithms
and by partitioning the cache space accordingly to
protect frequently reused data sets and to avoid cache
pollution. Our experiments with various applications
show that ULCC can significantly improve application
performance by nearly 40\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wu:2011:STB,
author = "Xing Wu and Frank Mueller",
title = "{ScalaExtrap}: trace-based communication extrapolation
for {SPMD} programs",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "113--122",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Performance modeling for scientific applications is
important for assessing potential application
performance and systems procurement in high-performance
computing (HPC). Recent progress on communication
tracing opens up novel opportunities for communication
modeling due to its lossless yet scalable trace
collection. Estimating the impact of scaling on
communication efficiency still remains non-trivial due
to execution-time variations and exposure to hardware
and software artifacts. This work contributes a
fundamentally novel modeling scheme. We synthetically
generate the application trace for large numbers of
nodes by extrapolation from a set of smaller traces. We
devise an innovative approach for topology
extrapolation of single program, multiple data (SPMD)
codes with stencil or mesh communication. The
extrapolated trace can subsequently be (a) replayed to
assess communication requirements before porting an
application, (b) transformed to auto-generate
communication benchmarks for various target platforms,
and (c) analyzed to detect communication inefficiencies
and scalability limitations. To the best of our
knowledge, rapidly obtaining the communication behavior
of parallel applications at arbitrary scale with the
availability of timed replay, yet without actual
execution of the application at this scale is without
precedence and has the potential to enable otherwise
infeasible system simulation at the exascale level.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{McKinley:2011:HPC,
author = "Kathryn S. McKinley",
title = "How's the parallel computing revolution going?",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "123--124",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941571",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Two trends changed the computing landscape over the
past decade: (1) hardware vendors started delivering
chip multiprocessors (CMPs) instead of uniprocessors,
and (2) software developers increasingly chose managed
languages instead of native languages. Unfortunately,
the former change is disrupting the virtuous-cycle
between performance improvements and software
innovation. Establishing a new parallel performance
virtuous cycle for managed languages will require
scalable applications executing on scalable Virtual
Machine (VM) services, since the VM schedules,
monitors, compiles, optimizes, garbage collects, and
executes together with the application. This talk
describes current progress, opportunities, and
challenges for scalable VM services. The parallel
computing revolution urgently needs more innovations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Karmani:2011:TCS,
author = "Rajesh K. Karmani and P. Madhusudan and Brandon M.
Moore",
title = "Thread contracts for safe parallelism",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "125--134",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941573",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We build a framework of thread contracts, called
Accord, that allows programmers to annotate their
concurrency co-ordination strategies. Accord
annotations allow programmers to declaratively specify
the parts of memory that a thread may read or write
into, and the locks that protect them, reflecting the
concurrency co-ordination among threads and the reason
why the program is free of data-races. We provide
automatic tools to check if the concurrency
co-ordination strategy ensures race-freedom, using
constraint-solvers (SMT solvers). Hence programmers
using Accord can both formally state and prove their
co-ordination strategies ensure race freedom. The
programmer's implementation of the co-ordination
strategy may however be correct or incorrect. We show
how the formal Accord contracts allow us to
automatically insert runtime assertions that serve to
check, during testing, whether the implementation
conforms to the contract. Using a large class of
data-parallel programs that share memory in intricate
ways, we show that natural and simple contracts suffice
to document the co-ordination strategy amongst threads,
and that the task of showing that the strategy ensures
race-freedom can be handled efficiently and
automatically by an existing SMT solver (Z3). While
co-ordination strategies can be proved race-free in our
framework, failure to prove the co-ordination strategy
race-free, accompanied by counter-examples produced by
the solver, indicates the presence of races. Using such
counterexamples, we report hitherto undiscovered
data-races that we found in the long-tested {\tt
applu\_l} benchmark in the Spec OMP2001 suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zheng:2011:GLO,
author = "Mai Zheng and Vignesh T. Ravi and Feng Qin and Gagan
Agrawal",
title = "{GRace}: a low-overhead mechanism for detecting data
races in {GPU} programs",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "135--146",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941574",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "In recent years, GPUs have emerged as an extremely
cost-effective means for achieving high performance.
Many application developers, including those with no
prior parallel programming experience, are now trying
to scale their applications using GPUs. While languages
like CUDA and OpenCL have eased GPU programming for
non-graphical applications, they are still explicitly
parallel languages. All parallel programmers,
particularly the novices, need tools that can help
ensuring the correctness of their programs. Like any
multithreaded environment, data races on GPUs can
severely affect the program reliability. Thus, tool
support for detecting race conditions can significantly
benefit GPU application developers. Existing approaches
for detecting data races on CPUs or GPUs have one or
more of the following limitations: (1) being ill-suited
for handling non-lock synchronization primitives on
GPUs; (2) lacking of scalability due to the state
explosion problem; (3) reporting many false positives
because of simplified modeling; and/or (4) incurring
prohibitive runtime and space overhead. In this paper,
we propose GRace, a new mechanism for detecting races
in GPU programs that combines static analysis with a
carefully designed dynamic checker for logging and
analyzing information at runtime. Our design utilizes
GPUs memory hierarchy to log runtime data accesses
efficiently. To improve the performance, GRace
leverages static analysis to reduce the number of
statements that need to be instrumented. Additionally,
by exploiting the knowledge of thread scheduling and
the execution model in the underlying GPUs, GRace can
accurately detect data races with no false positives
reported. Based on the above idea, we have built a
prototype of GRace with two schemes, i.e., GRace-stmt
and GRace-addr, for NVIDIA GPUs. Both schemes are
integrated with the same static analysis. We have
evaluated GRace-stmt and GRace-addr with three data
race bugs in three GPU kernel functions and also have
compared them with the existing approach, referred to
as B-tool. Our experimental results show that both
schemes of GRace are effective in detecting all
evaluated cases with no false positives, whereas Btool
reports many false positives for one evaluated case. On
the one hand, GRace-addr incurs low runtime overhead,
i.e., 22-116\%, and low space overhead, i.e., 9-18MB,
for the evaluated kernels. On the other hand,
GRace-stmt offers more help in diagnosing data races
with larger overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yi:2011:CRP,
author = "Jaeheon Yi and Caitlin Sadowski and Cormac Flanagan",
title = "Cooperative reasoning for preemptive execution",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "147--156",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941575",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We propose a cooperative methodology for multithreaded
software, where threads use traditional synchronization
idioms such as locks, but additionally document each
point of potential thread interference with a ``yield''
annotation. Under this methodology, code between two
successive yield annotations forms a serializable
transaction that is amenable to sequential reasoning.
This methodology reduces the burden of reasoning about
thread interleavings by indicating only those
interference points that matter. We present
experimental results showing that very few yield
annotations are required, typically one or two per
thousand lines of code. We also present dynamic
analysis algorithms for detecting cooperability
violations, where thread interference is not documented
by a yield, and for yield annotation inference for
legacy software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lesani:2011:CMT,
author = "Mohsen Lesani and Jens Palsberg",
title = "Communicating memory transactions",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "157--168",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941577",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Many concurrent programming models enable both
transactional memory and message passing. For such
models, researchers have built increasingly efficient
implementations and defined reasonable correctness
criteria, while it remains an open problem to obtain
the best of both worlds. We present a programming model
that is the first to have opaque transactions, safe
asynchronous message passing, and an efficient
implementation. Our semantics uses tentative message
passing and keeps track of dependencies to enable undo
of message passing in case a transaction aborts. We can
program communication idioms such as barrier and
rendezvous that do not deadlock when used in an atomic
block. Our experiments show that our model adds little
overhead to pure transactions, and that it is
significantly more efficient than Transactional Events.
We use a novel definition of safe message passing that
may be of independent interest.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Luchangco:2011:TCE,
author = "Victor Luchangco and Virendra J. Marathe",
title = "Transaction communicators: enabling cooperation among
concurrent transactions",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "169--178",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941578",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "In this paper, we propose to extend transactional
memory with transaction communicators, special objects
through which concurrent transactions can communicate:
changes by one transaction to a communicator can be
seen by concurrent transactions before the first
transaction commits. Although isolation of transactions
is compromised by such communication, we constrain the
effects of this compromise by tracking dependencies
among transactions, and preventing any transaction from
committing unless every transaction whose changes it
saw also commits. In particular, mutually dependent
transactions must commit or abort together, and
transactions that do not communicate remain isolated.
To help programmers synchronize accesses to
communicators, we also provide special
communicator-isolating transactions, which ensure
isolation even for accesses to communicators. We
propose language features to help programmers express
the communicator constructs. We implemented a novel
communicators-enabled STM runtime in the Maxine VM. Our
preliminary evaluation demonstrates that communicators
can be used in diverse settings to improve the
performance of transactional programs, and to empower
programmers with the ability to safely express within
transactions important programming idioms that
fundamentally require compromise of transaction
isolation (e.g., CSP-style synchronous
communication).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fernandes:2011:LFS,
author = "S{\'e}rgio Miguel Fernandes and Jo{\~a}o Cachopo",
title = "Lock-free and scalable multi-version software
transactional memory",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "179--188",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941579",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Software Transactional Memory (STM) was initially
proposed as a lock-free mechanism for concurrency
control. Early implementations had efficiency
limitations, and soon obstruction-free proposals
appeared, to tackle this problem, often simplifying STM
implementation. Today, most of the modern and
top-performing STMs use blocking designs, relying on
locks to ensure an atomic commit operation. This
approach has revealed better in practice, in part due
to its simplicity. Yet, it may have scalability
problems when we move into many-core computers,
requiring fine-tuning and careful programming to avoid
contention. In this paper we present and discuss the
modifications we made to a lock-based multi-version STM
in Java, to turn it into a lock-free implementation
that we have tested to scale at least up to 192 cores,
and which provides results that compete with, and
sometimes exceed, some of today's top-performing
lock-based implementations. The new lock-free commit
algorithm allows write transactions to proceed in
parallel, by allowing them to run their validation
phase independently of each other, and by resorting to
helping from threads that would otherwise be waiting to
commit, during the write-back phase. We also present a
new garbage collection algorithm to dispose of old
unused object versions that allows for asynchronous
identification of unnecessary versions, which minimizes
its interference with the rest of the transactional
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tian:2011:ESP,
author = "Chen Tian and Changhui Lin and Min Feng and Rajiv
Gupta",
title = "Enhanced speculative parallelization via incremental
recovery",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "189--200",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941580",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "The widespread availability of multicore systems has
led to an increased interest in speculative
parallelization of sequential programs using
software-based thread level speculation. Many of the
proposed techniques are implemented via state
separation where non-speculative computation state is
maintained separately from the speculative state of
threads performing speculative computations. If
speculation is successful, the results from speculative
state are committed to non-speculative state. However,
upon misspeculation, discard-all scheme is employed in
which speculatively computed results of a thread are
discarded and the computation is performed again. While
this scheme is simple to implement, one disadvantage of
discard-all is its inability to tolerate high
misspeculation rates due to its high runtime overhead.
Thus, it is not suitable for use in applications where
misspeculation rates are input dependent and therefore
may reach high levels. In this paper we develop an
approach for incremental recovery in which, instead of
discarding all of the results and reexecuting the
speculative computation in its entirety, the
computation is restarted from the earliest point at
which a misspeculation causing value is read. This
approach has two advantages. First, the cost of
recovery is reduced as only part of the computation is
reexecuted. Second, since recovery takes less time, the
likelihood of future misspeculations is reduced. We
design and implement a strategy for implementing
incremental recovery that allows results of partial
computations to be efficiently saved and reused. For a
set of programs where misspeculation rate is input
dependent, our experiments show that with inputs that
result in misspeculation rates of around 40\% and 80\%,
applying incremental recovery technique results in
1.2x-3.3x and 2.0x-6.6x speedups respectively over the
discard-all recovery scheme. Furthermore,
misspeculations observed during discard-all scheme are
reduced when incremental recovery is employed ---
reductions range from 10\% to 85\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Saraswat:2011:LBG,
author = "Vijay A. Saraswat and Prabhanjan Kambadur and Sreedhar
Kodali and David Grove and Sriram Krishnamoorthy",
title = "Lifeline-based global load balancing",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "201--212",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941582",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "{On shared-memory systems, Cilk-style work-stealing
has been used to effectively parallelize irregular
task-graph based applications such as Unbalanced Tree
Search (UTS). There are two main difficulties in
extending this approach to distributed memory. In the
shared memory approach, thieves (nodes without work)
constantly attempt to asynchronously steal work from
randomly chosen victims until they find work. In
distributed memory, thieves cannot autonomously steal
work from a victim without disrupting its execution.
When work is sparse, this results in performance
degradation. In essence, a direct extension of
traditional work-stealing to distributed memory
violates the work-first principle underlying
work-stealing. Further, thieves spend useless CPU
cycles attacking victims that have no work, resulting
in system inefficiencies in multi-programmed contexts.
Second, it is non-trivial to detect active distributed
termination (detect that programs at all nodes are
looking for work, hence there is no work). This problem
is well-studied and requires careful design for good
performance. Unfortunately, in most existing
languages/frameworks, application developers are forced
to implement their own distributed termination
detection. In this paper, we develop a simple set of
ideas that allow work-stealing to be efficiently
extended to distributed memory. First, we introduce
lifeline graphs: low-degree, low-diameter, fully
connected directed graphs. Such graphs can be
constructed from k -dimensional hypercubes. When a node
is unable to find work after w unsuccessful steals, it
quiesces after informing the outgoing edges in its
lifeline graph. Quiescent nodes do not disturb other
nodes. A quiesced node is reactivated when work arrives
from a lifeline and itself shares this work with those
of its incoming lifelines that are activated.
Termination occurs precisely when computation at all
nodes has quiesced. In a language such as X10, such
passive distributed termination can be detected
automatically using the finish construct --- no
application code is necessary. Our design is
implemented in a few hundred lines of X10. On the
binomial tree described in Olivier:08}, the program
achieve 87\% efficiency on an Infiniband cluster of
1024 Power7 cores, with a peak throughput of 2.37
GNodes/sec. It achieves 87\% efficiency on a Blue
Gene/P with 2048 processors, and a peak throughput of
0.966 GNodes/s. All numbers are relative to single core
sequential performance. This implementation has been
refactored into a reusable global load balancing
framework. Applications can use this framework to
obtain global load balance with minimal code changes.
In summary, we claim: (a) the first formulation of UTS
that does not involve application level global
termination detection, (b) the introduction of lifeline
graphs to reduce failed steals (c) the demonstration of
simple lifeline graphs based on k-hypercubes, (d)
performance with superior efficiency (or the same
efficiency but over a wider range) than published
results on UTS. In particular, our framework can
deliver the same or better performance as an
unrestricted random work-stealing implementation, while
reducing the number of attempted steals.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wang:2011:CSP,
author = "Zhaoguo Wang and Ran Liu and Yufei Chen and Xi Wu and
Haibo Chen and Weihua Zhang and Binyu Zang",
title = "{COREMU}: a scalable and portable parallel full-system
emulator",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "213--222",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941583",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "This paper presents the open-source COREMU, a scalable
and portable parallel emulation framework that
decouples the complexity of parallelizing full-system
emulators from building a mature sequential one. The
key observation is that CPU cores and devices in
current (and likely future) multiprocessors are
loosely-coupled and communicate through well-defined
interfaces. Based on this observation, COREMU emulates
multiple cores by creating multiple instances of
existing sequential emulators, and uses a thin library
layer to handle the inter-core and device communication
and synchronization, to maintain a consistent view of
system resources. COREMU also incorporates lightweight
memory transactions, feedback-directed scheduling, lazy
code invalidation and adaptive signal control to
provide scalable performance. To make COREMU useful in
practice, we also provide some preliminary tools and
APIs that can help programmers to diagnose performance
problems and (concurrency) bugs. A working prototype,
which reuses the widely-used QEMU as the sequential
emulator, is with only 2500 lines of code (LOCs)
changes to QEMU. It currently supports x64 and ARM
platforms, and can emulates up to 255 cores running
commodity OSes with practical performance, while QEMU
cannot scale above 32 cores. A set of performance
evaluation against QEMU indicates that, COREMU has
negligible uniprocessor emulation overhead, performs
and scales significantly better than QEMU. We also show
how COREMU could be used to diagnose performance
problems and concurrency bugs of both OS kernel and
parallel applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kogan:2011:WFQ,
author = "Alex Kogan and Erez Petrank",
title = "Wait-free queues with multiple enqueuers and
dequeuers",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "223--234",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941585",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "The queue data structure is fundamental and
ubiquitous. Lock-free versions of the queue are well
known. However, an important open question is whether
practical wait-free queues exist. Until now, only
versions with limited concurrency were proposed. In
this paper we provide a design for a practical
wait-free queue. Our construction is based on the
highly efficient lock-free queue of Michael and Scott.
To achieve wait-freedom, we employ a priority-based
helping scheme in which faster threads help the slower
peers to complete their pending operations. We have
implemented our scheme on multicore machines and
present performance measurements comparing our
implementation with that of Michael and Scott in
several system configurations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tanase:2011:SPC,
author = "Gabriel Tanase and Antal Buss and Adam Fidel and
Harshvardhan Harshvardhan and Ioannis Papadopoulos and
Olga Pearce and Timmie Smith and Nathan Thomas and
Xiabing Xu and Nedal Mourad and Jeremy Vu and Mauro
Bianco and Nancy M. Amato and Lawrence Rauchwerger",
title = "The {STAPL} parallel container framework",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "235--246",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941586",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "The Standard Template Adaptive Parallel Library
(STAPL) is a parallel programming infrastructure that
extends C++ with support for parallelism. It includes a
collection of distributed data structures called
pContainers that are thread-safe, concurrent objects,
i.e., shared objects that provide parallel methods that
can be invoked concurrently. In this work, we present
the STAPL Parallel Container Framework (PCF), that is
designed to facilitate the development of generic
parallel containers. We introduce a set of concepts and
a methodology for assembling a pContainer from existing
sequential or parallel containers, without requiring
the programmer to deal with concurrency or data
distribution issues. The PCF provides a large number of
basic parallel data structures (e.g., pArray, pList,
pVector, pMatrix, pGraph, pMap, pSet). The PCF provides
a class hierarchy and a composition mechanism that
allows users to extend and customize the current
container base for improved application expressivity
and performance. We evaluate STAPL pContainer
performance on a CRAY XT4 massively parallel system and
show that pContainer methods, generic pAlgorithms, and
different applications provide good scalability on more
than 16,000 processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kourtis:2011:CEC,
author = "Kornilios Kourtis and Vasileios Karakasis and Georgios
Goumas and Nectarios Koziris",
title = "{CSX}: an extended compression format for {SpMV} on
shared memory systems",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "247--256",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941587",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "The Sparse Matrix-Vector multiplication (SpMV) kernel
scales poorly on shared memory systems with multiple
processing units due to the streaming nature of its
data access pattern. Previous research has demonstrated
that an effective strategy to improve the kernel's
performance is to drastically reduce the data volume
involved in the computations. Since the storage formats
for sparse matrices include metadata describing the
structure of non-zero elements within the matrix, we
propose a generalized approach to compress metadata by
exploiting substructures within the matrix. We call the
proposed storage format Compressed Sparse eXtended
(CSX). In our implementation we employ runtime code
generation to construct specialized SpMV routines for
each matrix. Experimental evaluation on two shared
memory systems for 15 sparse matrices demonstrates
significant performance gains as the number of
participating cores increases. Regarding the cost of
CSX construction, we propose several strategies which
trade performance for preprocessing cost making CSX
applicable both to online and offline preprocessing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dotsenko:2011:ATF,
author = "Yuri Dotsenko and Sara S. Baghsorkhi and Brandon Lloyd
and Naga K. Govindaraju",
title = "Auto-tuning of {Fast Fourier Transform} on graphics
processors",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "257--266",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941589",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We present an auto-tuning framework for FFTs on
graphics processors (GPUs). Due to complex design of
the memory and compute subsystems on GPUs, the
performance of FFT kernels over the range of possible
input parameters can vary widely. We generate several
variants for each component of the FFT kernel that, for
different cases, are likely to perform well. Our
auto-tuner composes variants to generate kernels and
selects the best ones. We present heuristics to prune
the search space and profile only a small fraction of
all possible kernels. We compose optimized kernels to
improve the performance of larger FFT computations. We
implement the system using the NVIDIA CUDA API and
compare its performance to the state-of-the-art FFT
libraries. On a range of NVIDIA GPUs and input sizes,
our auto-tuned FFTs outperform the NVIDIA CUFFT 3.0
library by up to 38x and deliver up to 3x higher
performance compared to a manually-tuned FFT.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hong:2011:ACG,
author = "Sungpack Hong and Sang Kyun Kim and Tayo Oguntebi and
Kunle Olukotun",
title = "Accelerating {CUDA} graph algorithms at maximum warp",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "267--276",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941590",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Graphs are powerful data representations favored in
many computational domains. Modern GPUs have recently
shown promising results in accelerating computationally
challenging graph problems but their performance
suffered heavily when the graph structure is highly
irregular, as most real-world graphs tend to be. In
this study, we first observe that the poor performance
is caused by work imbalance and is an artifact of a
discrepancy between the GPU programming model and the
underlying GPU architecture.We then propose a novel
virtual warp-centric programming method that exposes
the traits of underlying GPU architectures to users.
Our method significantly improves the performance of
applications with heavily imbalanced workloads, and
enables trade-offs between workload imbalance and ALU
underutilization for fine-tuning the performance. Our
evaluation reveals that our method exhibits up to 9x
speedup over previous GPU algorithms and 12x over
single thread CPU execution on irregular graphs. When
properly configured, it also yields up to 30\%
improvement over previous GPU algorithms on regular
graphs. In addition to performance gains on graph
algorithms, our programming method achieves 1.3x to
15.1x speedup on a set of GPU benchmark applications.
Our study also confirms that the performance gap
between GPUs and other multi-threaded CPU graph
implementations is primarily due to the large
difference in memory bandwidth.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kim:2011:ASC,
author = "Jungwon Kim and Honggyu Kim and Joo Hwan Lee and
Jaejin Lee",
title = "Achieving a single compute device image in {OpenCL}
for multiple {GPUs}",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "277--288",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941591",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "In this paper, we propose an OpenCL framework that
combines multiple GPUs and treats them as a single
compute device. Providing a single virtual compute
device image to the user makes an OpenCL application
written for a single GPU portable to the platform that
has multiple GPU devices. It also makes the application
exploit full computing power of the multiple GPU
devices and the total amount of GPU memories available
in the platform. Our OpenCL framework automatically
distributes at run-time the OpenCL kernel written for a
single GPU into multiple CUDA kernels that execute on
the multiple GPU devices. It applies a run-time memory
access range analysis to the kernel by performing a
sampling run and identifies an optimal workload
distribution for the kernel. To achieve a single
compute device image, the runtime maintains virtual
device memory that is allocated in the main memory. The
OpenCL runtime treats the memory as if it were the
memory of a single GPU device and keeps it consistent
to the memories of the multiple GPU devices. Our
OpenCL-C-to-C translator generates the sampling code
from the OpenCL kernel code and OpenCL-C-to-CUDA-C
translator generates the CUDA kernel code for the
distributed OpenCL kernel. We show the effectiveness of
our OpenCL framework by implementing the OpenCL runtime
and two source-to-source translators. We evaluate its
performance with a system that contains 8 GPUs using 11
OpenCL benchmark applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Prabhakar:2011:QAS,
author = "Ramya Prabhakar and Shekhar Srikantaiah and Rajat Garg
and Mahmut Kandemir",
title = "{QoS} aware storage cache management in multi-server
environments",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "289--290",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941593",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "In this paper, we propose a novel two-step approach to
the management of the storage caches to provide
predictable performance in multi-server storage
architectures: (1) An adaptive QoS decomposition and
optimization step uses max-flow algorithm to determine
the best decomposition of application-level QoS to
sub-QoSs such that the application performance is
optimized, and (2) A storage cache allocation step uses
feedback control theory to allocate shared storage
cache space such that the specified QoSs are satisfied
throughout the execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Roy:2011:WAU,
author = "Amitabha Roy and Steven Hand and Tim Harris",
title = "Weak atomicity under the x86 memory consistency
model",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "291--292",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941594",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We consider the problem of building a weakly atomic
Software Transactional Memory (STM), that provides
Single (Global) Lock Atomicity (SLA) while adhering to
the x86 memory consistency model (x86-MM).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jeon:2011:KLG,
author = "Donghwan Jeon and Saturnino Garcia and Chris Louie and
Sravanthi Kota Venkata and Michael Bedford Taylor",
title = "{Kremlin}: like {\tt gprof}, but for parallelization",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "293--294",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941595",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "This paper overviews Kremlin, a software profiling
tool designed to assist the parallelization of serial
programs. Kremlin accepts a serial source code,
profiles it, and provides a list of regions that should
be considered in parallelization. Unlike a typical
profiler, Kremlin profiles not only work but also
parallelism, which is accomplished via a novel
technique called hierarchical critical path analysis.
Our evaluation demonstrates that Kremlin is highly
effective, resulting in a parallelized program whose
performance sometimes outperforms, and is mostly
comparable to, manual parallelization. At the same
time, Kremlin would require that the user parallelize
significantly fewer regions of the program. Finally, a
user study suggests Kremlin is effective in improving
the productivity of programmers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Strzodka:2011:TSM,
author = "Robert Strzodka and Mohammed Shaheen and Dawid Pajak",
title = "Time skewing made simple",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "295--296",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941596",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Time skewing and loop tiling has been known for a long
time to be a highly beneficial acceleration technique
for nested loops especially on bandwidth hungry
multi-core processors, but it is little used in
practice because efficient implementations utilize
complicated code and simple or abstract ones show much
smaller gains over naive nested loops. We break this
dilemma with an essential time skewing scheme that is
both compact and fast.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Grosset:2011:EGC,
author = "Andre Vincent Pascal Grosset and Peihong Zhu and
Shusen Liu and Suresh Venkatasubramanian and Mary
Hall",
title = "Evaluating graph coloring on {GPUs}",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "297--298",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941597",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "This paper evaluates features of graph coloring
algorithms implemented on graphics processing units
(GPUs), comparing coloring heuristics and thread
decompositions. As compared to prior work on graph
coloring for other parallel architectures, we find that
the large number of cores and relatively high global
memory bandwidth of a GPU lead to different strategies
for the parallel implementation. Specifically, we find
that a simple uniform block partitioning is very
effective on GPUs and our parallel coloring heuristics
lead to the same or fewer colors than prior approaches
for distributed-memory cluster architecture. Our
algorithm resolves many coloring conflicts across
partitioned blocks on the GPU by iterating through the
coloring process, before returning to the CPU to
resolve remaining conflicts. With this approach we get
as few color (if not fewer) than the best sequential
graph coloring algorithm and performance is close to
the fastest sequential graph coloring algorithms which
have poor color quality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ding:2011:TEP,
author = "Chen Ding",
title = "Two examples of parallel programming without
concurrency constructs {(PP-CC)}",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "299--300",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941598",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Stellwag:2011:WFN,
author = "Philippe Stellwag and Fabian Scheler and Jakob Krainz
and Wolfgang Schr{\"o}der-Preikschat",
title = "A wait-free {NCAS} library for parallel applications
with timing constraints",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "301--302",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941599",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We introduce our major ideas of a wait-free,
linearizable, and disjoint access parallel NCAS
library, called rtNCAS. It focuses the construction of
wait-free data structure operations (DSO) in real-time
circumstances. rtNCAS is able to conditionally swap
multiple independent words (NCAS) in an atomic manner.
It allows us, furthermore, to implement arbitrary DSO
by means of their sequential specification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Davies:2011:ABR,
author = "Teresa Davies and Zizhong Chen and Christer Karlsson
and Hui Liu",
title = "Algorithm-based recovery for {HPL}",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "303--304",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941600",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "When more processors are used for a calculation, the
probability that one will fail during the calculation
increases. Fault tolerance is a technique for allowing
a calculation to survive a failure, and includes
recovering lost data. A common method of recovery is
diskless checkpointing. However, it has high overhead
when a large amount of data is involved, as is the case
with matrix operations. A checksum-based method allows
fault tolerance of matrix operations with lower
overhead. This technique is applicable to the LU
decomposition in the benchmark HPL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Willcock:2011:APP,
author = "Jeremiah James Willcock and Torsten Hoefler and
Nicholas Gerard Edmonds and Andrew Lumsdaine",
title = "{Active Pebbles}: a programming model for highly
parallel fine-grained data-driven computations",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "305--306",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941601",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "A variety of programming models exist to support
large-scale, distributed memory, parallel computation.
These programming models have historically targeted
coarse-grained applications with natural locality such
as those found in a variety of scientific simulations
of the physical world. Fine-grained, irregular, and
unstructured applications such as those found in
biology, social network analysis, and graph theory are
less well supported. We propose Active Pebbles, a
programming model which allows these applications to be
expressed naturally; an accompanying execution model
ensures performance and scalability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fischer:2011:SMC,
author = "Topher Fischer and Eric Mercer and Neha Rungta",
title = "Symbolically modeling concurrent {MCAPI} executions",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "307--308",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941602",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Improper use of Inter-Process Communication (IPC)
within concurrent systems often creates data races
which can lead to bugs that are challenging to
discover. Techniques that use Satisfiability Modulo
Theories (SMT) problems to symbolically model possible
executions of concurrent software have recently been
proposed for use in the formal verification of
software. In this work we describe a new technique for
modeling executions of concurrent software that use a
message passing API called MCAPI. Our technique uses an
execution trace to create an SMT problem that
symbolically models all possible concurrent executions
and follows the same sequence of conditional branch
outcomes as the provided execution trace. We check if
there exists a satisfying assignment to the SMT problem
with respect to specific safety properties. If such an
assignment exists, it provides the conditions that lead
to the violation of the property. We show how our
method models behaviors of MCAPI applications that are
ignored in previously published techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Siegel:2011:AFV,
author = "Stephen F. Siegel and Timothy K. Zirkel",
title = "Automatic formal verification of {MPI}-based parallel
programs",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "309--310",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941603",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "The Toolkit for Accurate Scientific Software (TASS) is
a suite of tools for the formal verification of
MPI-based parallel programs used in computational
science. TASS can verify various safety properties as
well as compare two programs for functional
equivalence. The TASS front end takes an integer $ n
\geq 1 $ and a C/MPI program, and constructs an
abstract model of the program with $n$ processes.
Procedures, structs, (multi-dimensional) arrays,
heap-allocated data, pointers, and pointer arithmetic
are all representable in a TASS model. The model is
then explored using symbolic execution and explicit
state space enumeration. A number of techniques are
used to reduce the time and memory consumed. A variety
of realistic MPI programs have been verified with TASS,
including Jacobi iteration and manager-worker type
programs, and some subtle defects have been discovered.
TASS is written in Java and is available from
\path=http://vsl.cis.udel.edu/tass= under the Gnu
Public License.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Donaldson:2011:STA,
author = "Alastair F. Donaldson and Daniel Kroening and Philipp
Ruemmer",
title = "{SCRATCH}: a tool for automatic analysis of {DMA}
races",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "311--312",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941604",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We present the SCRATCH tool, which uses bounded model
checking and k-induction to automatically analyse
software for multicore processors such as the Cell BE,
in order to detect DMA races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Botincan:2011:ASP,
author = "Matko Botincan and Mike Dodds and Alastair F.
Donaldson and Matthew J. Parkinson",
title = "Automatic safety proofs for asynchronous memory
operations",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "313--314",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941605",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We present a work-in-progress proof system and tool,
based on separation logic, for analysing memory safety
of multicore programs that use asynchronous memory
operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Filinski:2011:TCT,
author = "Andrzej Filinski",
title = "Towards a comprehensive theory of monadic effects",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "1--1",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034775",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gibbons:2011:JDI,
author = "Jeremy Gibbons and Ralf Hinze",
title = "Just do it: simple monadic equational reasoning",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "2--14",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034777",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Swamy:2011:LMP,
author = "Nikhil Swamy and Nataliya Guts and Daan Leijen and
Michael Hicks",
title = "Lightweight monadic programming in {ML}",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "15--27",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034778",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mitchell:2011:FPT,
author = "Emily G. Mitchell",
title = "Functional programming through deep time: modeling the
first complex ecosystems on {Earth}",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "28--31",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034779",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Schrijvers:2011:MZV,
author = "Tom Schrijvers and Bruno C. d. S. Oliveira",
title = "Monads, zippers and views: virtualizing the monad
stack",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "32--44",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034781",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Krishnaswami:2011:SMG,
author = "Neelakantan R. Krishnaswami and Nick Benton",
title = "A semantic model for graphical user interfaces",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "45--57",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034782",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shivers:2011:MRT,
author = "Olin Shivers and Aaron J. Turon",
title = "Modular rollback through control logging: a pair of
twin functional pearls",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "58--68",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034783",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Vardoulakis:2011:PFA,
author = "Dimitrios Vardoulakis and Olin Shivers",
title = "Pushdown flow analysis of first-class control",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "69--80",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034785",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Materzok:2011:SDC,
author = "Marek Materzok and Dariusz Biernacki",
title = "Subtyping delimited continuations",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "81--93",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034786",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Castagna:2011:STF,
author = "Giuseppe Castagna and Zhiwu Xu",
title = "Set-theoretic foundation of parametric polymorphism
and subtyping",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "94--106",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034788",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gesbert:2011:PPS,
author = "Nils Gesbert and Pierre Genev{\`e}s and Nabil
Laya{\"\i}da",
title = "Parametric polymorphism and semantic subtyping: the
logical connection",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "107--116",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034789",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Morihata:2011:BTI,
author = "Akimasa Morihata and Kiminori Matsuzaki",
title = "Balanced trees inhabiting functional parallel
programming",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "117--128",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034791",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2011:ISA,
author = "Yan Chen and Joshua Dunfield and Matthew A. Hammer and
Umut A. Acar",
title = "Implicit self-adjusting computation for purely
functional programs",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "129--141",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034792",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Takeyama:2011:PAC,
author = "Makoto Takeyama",
title = "Programming assurance cases in {Agda}",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "142--142",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034794",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Devriese:2011:BST,
author = "Dominique Devriese and Frank Piessens",
title = "On the bright side of type classes: instance arguments
in {Agda}",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "143--155",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034796",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Magalhaes:2011:FMM,
author = "Jos{\'e} Pedro Magalh{\~a}es and W. Bas de Haas",
title = "Functional modelling of musical harmony: an experience
report",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "156--162",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034797",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gonthier:2011:HMA,
author = "Georges Gonthier and Beta Ziliani and Aleksandar
Nanevski and Derek Dreyer",
title = "How to make ad hoc proof automation less ad hoc",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "163--175",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034798",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Disney:2011:THO,
author = "Tim Disney and Cormac Flanagan and Jay McCarthy",
title = "Temporal higher-order contracts",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "176--188",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034800",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Might:2011:PDF,
author = "Matthew Might and David Darais and Daniel Spiewak",
title = "Parsing with derivatives: a functional pearl",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "189--195",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034801",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ueno:2011:ENM,
author = "Katsuhiro Ueno and Atsushi Ohori and Toshiaki Otomo",
title = "An efficient non-moving garbage collector for
functional languages",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "196--208",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034802",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gill:2011:DEF,
author = "Andy Gill and Andrew Farmer",
title = "Deriving an efficient {FPGA} implementation of a low
density parity check forward error corrector",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "209--220",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034804",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ghica:2011:GSIb,
author = "Dan R. Ghica and Alex Smith and Satnam Singh",
title = "Geometry of synthesis {IV}: compiling affine recursion
into static hardware",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "221--233",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034805",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ahn:2011:HMS,
author = "Ki Yung Ahn and Tim Sheard",
title = "A hierarchy of mendler style recursion combinators:
taming inductive datatypes with negative occurrences",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "234--246",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034807",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jay:2011:TSI,
author = "Barry Jay and Jens Palsberg",
title = "Typed self-interpretation by pattern matching",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "247--258",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034808",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chyzak:2011:UCP,
author = "Fr{\'e}d{\'e}ric Chyzak and Alexis Darrasse",
title = "Using {{\tt camlp4}} for presenting dynamic
mathematics on the {Web}: {DynaMoW}, an {OCaml}
language extension for the run-time generation of
mathematical contents and their presentation on the
{Web}",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "259--265",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034809",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Swamy:2011:SDP,
author = "Nikhil Swamy and Juan Chen and C{\'e}dric Fournet and
Pierre-Yves Strub and Karthikeyan Bhargavan and Jean
Yang",
title = "Secure distributed programming with value-dependent
types",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "266--278",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034811",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Foster:2011:FNP,
author = "Nate Foster and Rob Harrison and Michael J. Freedman
and Christopher Monsanto and Jennifer Rexford and Alec
Story and David Walker",
title = "{Frenetic}: a network programming language",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "279--291",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034812",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fisher:2011:FLT,
author = "Kathleen Fisher and Nate Foster and David Walker and
Kenny Q. Zhu",
title = "{Forest}: a language and toolkit for programming with
filestores",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "292--306",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034814",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ohori:2011:MSM,
author = "Atsushi Ohori and Katsuhiro Ueno",
title = "Making {Standard ML} a practical database programming
language",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "307--319",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034815",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pouillard:2011:NP,
author = "Nicolas Pouillard",
title = "Nameless, painless",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "320--332",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034817",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Weirich:2011:BU,
author = "Stephanie Weirich and Brent A. Yorgey and Tim Sheard",
title = "Binders unbound",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "333--345",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034818",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Popescu:2011:RPS,
author = "Andrei Popescu and Elsa L. Gunter",
title = "Recursion principles for syntax with bindings and
substitution",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "346--358",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034819",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hinze:2011:PUF,
author = "Ralf Hinze and Daniel W. H. James",
title = "Proving the unique fixed-point principle correct: an
adventure with category theory",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "359--371",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034821",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gaboardi:2011:LPS,
author = "Marco Gaboardi and Luca Paolini and Mauro Piccolo",
title = "Linearity and {PCF}: a semantic insight!",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "372--384",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034822",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mu:2011:GDT,
author = "Shin-Cheng Mu and Akimasa Morihata",
title = "Generalising and dualising the third list-homomorphism
theorem: functional pearl",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "385--391",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034824",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wang:2011:IUE,
author = "Meng Wang and Jeremy Gibbons and Nicolas Wu",
title = "Incremental updates for efficient bidirectional
transformations",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "392--403",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034825",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gotsman:2011:MVP,
author = "Alexey Gotsman and Hongseok Yang",
title = "Modular verification of preemptive {OS} kernels",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "404--417",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034827",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chargueraud:2011:CFV,
author = "Arthur Chargu{\'e}raud",
title = "Characteristic formulae for the verification of
imperative programs",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "418--430",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034828",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ahmed:2011:EPC,
author = "Amal Ahmed and Matthias Blume",
title = "An equivalence-preserving {CPS} translation via
multi-language semantics",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "431--444",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034830",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Thamsborg:2011:KLR,
author = "Jacob Thamsborg and Lars Birkedal",
title = "A {Kripke} logical relation for effect-based program
transformations",
journal = j-SIGPLAN,
volume = "46",
number = "9",
pages = "445--456",
month = sep,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2034574.2034831",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Oct 22 08:31:30 MDT 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ICFP '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sutherland:2011:SP,
author = "Ivan Sutherland",
title = "The sequential prison",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "1--2",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048068",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2011:SPD,
author = "Tongping Liu and Emery D. Berger",
title = "{SHERIFF}: precise detection and automatic mitigation
of false sharing",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "3--18",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048070",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Weeratunge:2011:APA,
author = "Dasarath Weeratunge and Xiangyu Zhang and Suresh
Jaganathan",
title = "Accentuating the positive: atomicity inference and
enforcement using correct executions",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "19--34",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048071",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2011:SST,
author = "Du Li and Witawas Srisa-an and Matthew B. Dwyer",
title = "{SOS}: saving time in dynamic race detection with
stationary analysis",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "35--50",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048072",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shacham:2011:TAC,
author = "Ohad Shacham and Nathan Bronson and Alex Aiken and
Mooly Sagiv and Martin Vechev and Eran Yahav",
title = "Testing atomicity of composed concurrent operations",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "51--64",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048073",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yessenov:2011:DDS,
author = "Kuat Yessenov and Zhilei Xu and Armando Solar-Lezama",
title = "Data-driven synthesis for object-oriented frameworks",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "65--82",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048075",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pu:2011:SFO,
author = "Yewen Pu and Rastislav Bodik and Saurabh Srivastava",
title = "Synthesis of first-order dynamic programming
algorithms",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "83--98",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048076",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Doherty:2011:KAM,
author = "Jesse Doherty and Laurie Hendren and Soroush Radpour",
title = "Kind analysis for {MATLAB}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "99--118",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048077",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Feldthaus:2011:TSR,
author = "Asger Feldthaus and Todd Millstein and Anders
M{\o}ller and Max Sch{\"a}fer and Frank Tip",
title = "Tool-supported refactoring for {JavaScript}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "119--138",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048078",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kats:2011:ILD,
author = "Lennart C. L. Kats and Rob Vermaas and Eelco Visser",
title = "Integrated language definition testing: enabling
test-driven language development",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "139--154",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048080",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jovic:2011:CMI,
author = "Milan Jovic and Andrea Adamoli and Matthias
Hauswirth",
title = "Catch me if you can: performance bug detection in the
wild",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "155--170",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048081",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Joshi:2011:PPT,
author = "Pallavi Joshi and Haryadi S. Gunawi and Koushik Sen",
title = "{PREFAIL}: a programmable tool for multiple-failure
injection",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "171--188",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048082",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Thummalapenta:2011:SMS,
author = "Suresh Thummalapenta and Tao Xie and Nikolai Tillmann
and Jonathan de Halleux and Zhendong Su",
title = "Synthesizing method sequences for high-coverage
testing",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "189--206",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048083",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tripp:2011:HED,
author = "Omer Tripp and Greta Yorsh and John Field and Mooly
Sagiv",
title = "{HAWKEYE}: effective discovery of dataflow impediments
to parallelization",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "207--224",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048085",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Golan-Gueta:2011:AFG,
author = "Guy Golan-Gueta and Nathan Bronson and Alex Aiken and
G. Ramalingam and Mooly Sagiv and Eran Yahav",
title = "Automatic fine-grain locking using shape properties",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "225--242",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048086",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ke:2011:SPP,
author = "Chuanle Ke and Lei Liu and Chao Zhang and Tongxin Bai
and Bryan Jacobs and Chen Ding",
title = "Safe parallel programming using dynamic dependence
hints",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "243--258",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048087",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Raman:2011:SSP,
author = "Arun Raman and Greta Yorsh and Martin Vechev and Eran
Yahav",
title = "{Sprint}: speculative prefetching of remote data",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "259--274",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048088",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Aftandilian:2011:AA,
author = "Edward E. Aftandilian and Samuel Z. Guyer and Martin
Vechev and Eran Yahav",
title = "Asynchronous assertions",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "275--288",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048090",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hoffman:2011:RPS,
author = "Kevin J. Hoffman and Harrison Metzger and Patrick
Eugster",
title = "{Ribbons}: a partially shared memory programming
model",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "289--306",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048091",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yang:2011:WNM,
author = "Xi Yang and Stephen M. Blackburn and Daniel Frampton
and Jennifer B. Sartor and Kathryn S. McKinley",
title = "Why nothing matters: the impact of zeroing",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "307--324",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048092",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Darulova:2011:TNC,
author = "Eva Darulova and Viktor Kuncak",
title = "Trustworthy numerical computation in {Scala}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "325--344",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048094",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2011:JEC,
author = "Siliang Li and Gang Tan",
title = "{JET}: exception checking in the {Java Native
Interface}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "345--358",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048095",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{David:2011:ISM,
author = "Cristina David and Wei-Ngan Chin",
title = "Immutable specifications for more concise and precise
verification",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "359--374",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048096",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shali:2011:HPE,
author = "Amin Shali and William R. Cook",
title = "Hybrid partial evaluation",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "375--390",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048098",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Erdweg:2011:SLB,
author = "Sebastian Erdweg and Tillmann Rendel and Christian
K{\"a}stner and Klaus Ostermann",
title = "{SugarJ}: library-based syntactic language
extensibility",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "391--406",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048099",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Demetrescu:2011:RIP,
author = "Camil Demetrescu and Irene Finocchi and Andrea
Ribichini",
title = "Reactive imperative programming with dataflow
constraints",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "407--426",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048100",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Burckhardt:2011:TPO,
author = "Sebastian Burckhardt and Daan Leijen and Caitlin
Sadowski and Jaeheon Yi and Thomas Ball",
title = "Two for the price of one: a model for parallel and
incremental computation",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "427--444",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048101",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tian:2011:STT,
author = "Kai Tian and Eddy Zhang and Xipeng Shen",
title = "A step towards transparent integration of
input-consciousness into dynamic program
optimizations",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "445--462",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048103",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jo:2011:ELR,
author = "Youngjoon Jo and Milind Kulkarni",
title = "Enhancing locality for recursive traversals of
recursive structures",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "463--482",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048104",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Adams:2011:FST,
author = "Michael D. Adams and Andrew W. Keep and Jan Midtgaard
and Matthew Might and Arun Chauhan and R. Kent Dybvig",
title = "Flow-sensitive type recovery in linear-log time",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "483--498",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048105",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Acar:2011:OSC,
author = "Umut A. Acar and Arthur Chargu{\'e}raud and Mike
Rainey",
title = "Oracle scheduling: controlling granularity in
implicitly parallel languages",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "499--518",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048106",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jeon:2011:KPS,
author = "Donghwan Jeon and Saturnino Garcia and Chris Louie and
Michael Bedford Taylor",
title = "{Kismet}: parallel speedup estimates for serial
programs",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "519--536",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048108",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cledat:2011:ESS,
author = "Romain E. Cledat and Tushar Kumar and Santosh Pande",
title = "Efficiently speeding up sequential computation through
the n-way programming model",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "537--554",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048109",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pyla:2011:ECG,
author = "Hari K. Pyla and Calvin Ribbens and Srinidhi
Varadarajan",
title = "Exploiting coarse-grain speculative parallelism",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "555--574",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048110",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Turon:2011:SJP,
author = "Aaron J. Turon and Claudio V. Russo",
title = "Scalable join patterns",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "575--594",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048111",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Delaware:2011:PLT,
author = "Benjamin Delaware and William Cook and Don Batory",
title = "Product lines of theorems",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "595--608",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048113",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ina:2011:GTG,
author = "Lintaro Ina and Atsushi Igarashi",
title = "Gradual typing for generics",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "609--624",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048114",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tov:2011:TST,
author = "Jesse A. Tov and Riccardo Pucella",
title = "A theory of substructural types and control",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "625--642",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048115",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Buse:2011:BBU,
author = "Raymond P. L. Buse and Caitlin Sadowski and Westley
Weimer",
title = "Benefits and barriers of user evaluation in software
engineering research",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "643--656",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048117",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sewe:2011:CCS,
author = "Andreas Sewe and Mira Mezini and Aibek Sarimbekov and
Walter Binder",
title = "Da capo con {Scala}: design and analysis of a {Scala}
benchmark suite for the {Java Virtual Machine}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "657--676",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048118",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Richards:2011:ACJ,
author = "Gregor Richards and Andreas Gal and Brendan Eich and
Jan Vitek",
title = "Automated construction of {JavaScript} benchmarks",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "677--694",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048119",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hemel:2011:DPM,
author = "Zef Hemel and Eelco Visser",
title = "Declaratively programming the {Mobile Web} with
{Mobl}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "695--712",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048121",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sunshine:2011:FCS,
author = "Joshua Sunshine and Karl Naden and Sven Stork and
Jonathan Aldrich and {\'E}ric Tanter",
title = "First-class state change in {Plaid}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "713--732",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048122",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lorenz:2011:CLL,
author = "David H. Lorenz and Boaz Rosenan",
title = "{Cedalion}: a language for language oriented
programming",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "733--752",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048123",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hammer:2011:SAS,
author = "Matthew A. Hammer and Georg Neis and Yan Chen and Umut
A. Acar",
title = "Self-adjusting stack machines",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "753--772",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048124",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kulkarni:2011:JCP,
author = "Prasad A. Kulkarni",
title = "{JIT} compilation policy for modern machines",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "773--788",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048126",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wu:2011:RTS,
author = "Peng Wu and Hiroshige Hayashizaki and Hiroshi Inoue
and Toshio Nakatani",
title = "Reducing trace selection footprint for large-scale
{Java} applications without performance loss",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "789--804",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048127",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kastner:2011:VAP,
author = "Christian K{\"a}stner and Paolo G. Giarrusso and
Tillmann Rendel and Sebastian Erdweg and Klaus
Ostermann and Thorsten Berger",
title = "Variability-aware parsing in the presence of lexical
macros and conditional compilation",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "805--824",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048128",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wurthinger:2011:SAR,
author = "Thomas W{\"u}rthinger and Danilo Ansaloni and Walter
Binder and Christian Wimmer and Hanspeter
M{\"o}ssenb{\"o}ck",
title = "Safe and atomic run-time code evolution for {Java} and
its application to dynamic {AOP}",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "825--844",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048129",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pinto:2011:SAC,
author = "Pedro da Rocha Pinto and Thomas Dinsdale-Young and
Mike Dodds and Philippa Gardner and Mark Wheelhouse",
title = "A simple abstraction for complex concurrent indexes",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "845--864",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048131",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Anderson:2011:CNP,
author = "Zachary Anderson and David Gay",
title = "Composable, nestable, pessimistic atomic statements",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "865--884",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048132",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lublinerman:2011:DI,
author = "Roberto Lublinerman and Jisheng Zhao and Zoran
Budimli{\'c} and Swarat Chaudhuri and Vivek Sarkar",
title = "Delegated isolation",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "885--902",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048133",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Harris:2011:ACA,
author = "Tim Harris and Martin Abadi and Rebecca Isaacs and
Ross McIlroy",
title = "{AC}: composable asynchronous {IO} for native
languages",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "903--920",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048134",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Austin:2011:VVL,
author = "Thomas H. Austin and Tim Disney and Cormac Flanagan",
title = "Virtual values for language extension",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "921--938",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048136",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Palmer:2011:BJM,
author = "Zachary Palmer and Scott F. Smith",
title = "Backstage {Java}: making a difference in
metaprogramming",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "939--958",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048137",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Verwaest:2011:FOL,
author = "Toon Verwaest and Camillo Bruni and Mircea Lungu and
Oscar Nierstrasz",
title = "Flexible object layouts: enabling lightweight language
extensions by intercepting slot access",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "959--972",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048138",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Allen:2011:TCM,
author = "Eric Allen and Justin Hilburn and Scott Kilpatrick and
Victor Luchangco and Sukyoung Ryu and David Chase and
Guy Steele",
title = "Type checking modular multiple dispatch with
parametric polymorphism and multiple inheritance",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "973--992",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048140",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Im:2011:STS,
author = "Hyeonseung Im and Keiko Nakata and Jacques Garrigue
and Sungwoo Park",
title = "A syntactic type system for recursive modules",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "993--1012",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048141",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Summers:2011:FBC,
author = "Alexander J. Summers and Peter Mueller",
title = "Freedom before commitment: a lightweight type system
for object initialisation",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "1013--1032",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048142",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Madhavan:2011:NDV,
author = "Ravichandhran Madhavan and Raghavan Komondoor",
title = "Null dereference verification via over-approximated
weakest pre-conditions analysis",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "1033--1052",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048144",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sridharan:2011:FTA,
author = "Manu Sridharan and Shay Artzi and Marco Pistoia and
Salvatore Guarnieri and Omer Tripp and Ryan Berg",
title = "{F4F}: taint analysis of framework-based {Web}
applications",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "1053--1068",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048145",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Son:2011:RFM,
author = "Sooel Son and Kathryn S. McKinley and Vitaly
Shmatikov",
title = "{RoleCast}: finding missing security checks when you
do not know what checks are",
journal = j-SIGPLAN,
volume = "46",
number = "10",
pages = "1069--1084",
month = oct,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076021.2048146",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:53 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '11 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Veldema:2011:IDP,
author = "Ronald Veldema and Mich{\ae}l Philippsen",
title = "Iterative data-parallel mark\&sweep on a {GPU}",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "1--10",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993480",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "Automatic memory management makes programming easier.
This is also true for general purpose GPU computing
where currently no garbage collectors exist. In this
paper we present a parallel mark-and-sweep collector to
collect GPU memory on the GPU and tune its performance.
Performance is increased by: (1) data-parallel marking
and sweeping of regions of memory, (2) marking all
elements of large arrays in parallel, (3) trading
recursion over parallelism to match deeply linked data
structures. (1) is achieved by coarsely processing all
potential objects in a region of memory in parallel.
When during (1) a large array is detected, it is put
aside and a parallel-for is later issued on the GPU to
mark its elements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Majo:2011:MMN,
author = "Zoltan Majo and Thomas R. Gross",
title = "Memory management in {NUMA} multicore systems: trapped
between cache contention and interconnect overhead",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "11--20",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993481",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "Multiprocessors based on processors with multiple
cores usually include a non-uniform memory architecture
(NUMA); even current 2-processor systems with 8 cores
exhibit non-uniform memory access times. As the cores
of a processor share a common cache, the issues of
memory management and process mapping must be
revisited. We find that optimizing only for data
locality can counteract the benefits of cache
contention avoidance and vice versa. Therefore, system
software must take both data locality and cache
contention into account to achieve good performance,
and memory management cannot be decoupled from process
scheduling. We present a detailed analysis of a
commercially available NUMA-multicore architecture, the
Intel Nehalem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Marlow:2011:MGC,
author = "Simon Marlow and Simon Peyton Jones",
title = "Multicore garbage collection with local heaps",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "21--32",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993482",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "In a parallel, shared-memory, language with a garbage
collected heap, it is desirable for each processor to
perform minor garbage collections independently.
Although obvious, it is difficult to make this idea pay
off in practice, especially in languages where mutation
is common. We present several techniques that
substantially improve the state of the art. We describe
these techniques in the context of a full-scale
implementation of Haskell, and demonstrate that our
local-heap collector substantially improves scaling,
peak performance, and robustness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Garner:2011:CEO,
author = "Robin J. Garner and Stephen M. Blackburn and Daniel
Frampton",
title = "A comprehensive evaluation of object scanning
techniques",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "33--42",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993484",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "At the heart of all garbage collectors lies the
process of identifying and processing reference fields
within an object. Despite its key role, and evidence of
many different implementation approaches, to our
knowledge no comprehensive quantitative study of this
design space exists. The lack of such a study means
that implementers must rely on `conventional wisdom',
hearsay, and their own costly analysis. Starting with
mechanisms described in the literature and a variety of
permutations of these, we explore the impact of a
number of dimensions including: (a) the choice of data
structure, (b) levels of indirection from object to
metadata, and (c) specialization of scanning code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gu:2011:TPL,
author = "Xiaoming Gu and Chen Ding",
title = "On the theory and potential of {LRU--MRU}
collaborative cache management",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "43--54",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993485",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "The goal of cache management is to maximize data
reuse. Collaborative caching provides an interface for
software to communicate access information to hardware.
In theory, it can obtain optimal cache performance. In
this paper, we study a collaborative caching system
that allows a program to choose different caching
methods for its data. As an interface, it may be used
in arbitrary ways, sometimes optimal but probably
suboptimal most times and even counter productive. We
develop a theoretical foundation for collaborative
caches to show the inclusion principle and the
existence of a distance metric we call LRU-MRU stack
distance. The new stack distance is important for
program analysis and transformation to target a
hierarchical collaborative cache system rather than a
single cache configuration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Afek:2011:CIA,
author = "Yehuda Afek and Dave Dice and Adam Morrison",
title = "Cache index-aware memory allocation",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "55--64",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993486",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "Poor placement of data blocks in memory may negatively
impact application performance because of an increase
in the cache conflict miss rate [18]. For dynamically
allocated structures this placement is typically
determined by the memory allocator. Cache
index-oblivious allocators may inadvertently place
blocks on a restricted fraction of the available cache
indexes, artificially and needlessly increasing the
conflict miss rate. While some allocators are less
vulnerable to this phenomena, no general-purpose malloc
allocator is index-aware and methodologically addresses
this concern. We demonstrate that many existing
state-of-the-art allocators are index-oblivious,
admitting performance pathologies for certain block
sizes. We show that a simple adjustment within the
allocator to control the spacing of blocks can provide
better index coverage, which in turn reduces the
superfluous conflict miss rate in various applications,
improving performance with no observed negative
consequ",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hertz:2011:WWR,
author = "Matthew Hertz and Stephen Kane and Elizabeth Keudel
and Tongxin Bai and Chen Ding and Xiaoming Gu and
Jonathan E. Bard",
title = "Waste not, want not: resource-based garbage collection
in a shared environment",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "65--76",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993487",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "To achieve optimal performance, garbage-collected
applications must balance the sizes of their heaps
dynamically. Sizing the heap too small can reduce
throughput by increasing the number of garbage
collections that must be performed. Too large a heap,
however, can cause the system to page and drag down the
overall throughput. In today's multicore,
multiprocessor machines, multiple garbage-collected
applications may run simultaneously. As a result, each
virtual machine (VM) must adjust its memory demands to
reflect not only the behavior of the application it is
running, but also the behavior of the peer applications
running on the system. We present a memory management
system that enables VMs to react to memory demands
dynamically.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mutlu:2011:MSM,
author = "Onur Mutlu",
title = "Memory systems in the many-core era: challenges,
opportunities, and solution directions",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "77--78",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993489",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "The memory subsystem is a fundamental performance and
energy bottleneck in almost all computing systems.
Recent trends towards increasingly more cores on die,
consolidation of diverse workloads on a single chip,
and difficulty of DRAM scaling impose new requirements
and exacerbate old demands on the memory system. In
particular, the need for memory bandwidth and capacity
is increasing [14], applications' interference in
memory system increasingly limits system performance
and makes the system hard to control [12], memory
energy and power are key design concerns [8], and DRAM
technology consumes significant amount of energy and
does not scale down easily to smaller technology nodes
[7].",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tene:2011:CCC,
author = "Gil Tene and Balaji Iyengar and Michael Wolf",
title = "{C4}: the continuously concurrent compacting
collector",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "79--88",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993491",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "C4, the Continuously Concurrent Compacting Collector,
an updated generational form of the Pauseless GC
Algorithm [7], is introduced and described, along with
details of its implementation on modern X86 hardware.
It uses a read barrier to support concur- rent
compaction, concurrent remapping, and concurrent
incremental update tracing. C4 differentiates itself
from other generational garbage collectors by
supporting simultaneous-generational concurrency: the
different generations are collected using concurrent
(non stop-the-world) mechanisms that can be
simultaneously and independently active. C4 is able to
continuously perform concurrent young generation
collections, even during long periods of concurrent
full heap collection, allowing C4 to sustain high
allocation rates and maintain the efficiency typical to
generational collectors, without sacrificing response
times or reverting to stop-the-world operation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kalibera:2011:HRO,
author = "Tomas Kalibera and Richard Jones",
title = "Handles revisited: optimising performance and memory
costs in a real-time collector",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "89--98",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993492",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "Compacting garbage collectors must update all
references to objects they move. Updating is a lengthy
operation but the updates must be transparent to the
mutator. The consequence is that no space can be
reclaimed until all references have been updated which,
in a real-time collector, must be done incrementally.
One solution is to replace direct references to objects
with handles. Handles offer several advantages to a
real-time collector. They eliminate the updating
problem. They allow immediate reuse of the space used
by evacuated objects. They incur no copy reserve
overhead. However, the execution time overhead of
handles has led to them being abandoned by most modern
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Aigner:2011:STM,
author = "Martin Aigner and Andreas Haas and Christoph M. Kirsch
and Michael Lippautz and Ana Sokolova and Stephanie
Stroka and Andreas Unterweger",
title = "Short-term memory for self-collecting mutators",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "99--108",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993493",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "We propose a new memory model called short-term memory
for managing objects on the heap. In contrast to the
traditional persistent memory model for heap
management, objects in short-term memory expire after a
finite amount of time, which makes deallocation
unnecessary. Instead, expiration of objects may be
extended, if necessary, by refreshing. We have
developed a concurrent, incremental, and non-moving
implementation of short-term memory for explicit
refreshing called self-collecting mutators that is
based on programmer-controlled time and integrated into
state-of-the-art runtimes of three programming
languages: C, Java, and Go. All memory management
operations run in constant time without acquiring any
locks modulo the underlying allocators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Singer:2011:GCA,
author = "Jeremy Singer and George Kovoor and Gavin Brown and
Mikel Luj{\'a}n",
title = "Garbage collection auto-tuning for {Java} {MapReduce}
on multi-cores",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "109--118",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993495",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "MapReduce has been widely accepted as a simple
programming pattern that can form the basis for
efficient, large-scale, distributed data processing.
The success of the MapReduce pattern has led to a
variety of implementations for different computational
scenarios. In this paper we present MRJ, a MapReduce
Java framework for multi-core architectures. We
evaluate its scalability on a four-core, hyperthreaded
Intel Core i7 processor, using a set of standard
MapReduce benchmarks. We investigate the significant
impact that Java runtime garbage collection has on the
performance and scalability of MRJ. We propose the use
of memory management auto-tuning techniques based on
machine learning.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wagner:2011:CMM,
author = "Gregor Wagner and Andreas Gal and Christian Wimmer and
Brendan Eich and Michael Franz",
title = "Compartmental memory management in a modern {Web}
browser",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "119--128",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993496",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "Since their inception, the usage pattern of web
browsers has changed substantially. Rather than
sequentially navigating static web sites, modern web
browsers often manage a large number of simultaneous
tabs displaying dynamic web content, each of which
might be running a substantial amount of client-side
JavaScript code. This environment introduced a new
degree of parallelism that was not fully embraced by
the underlying JavaScript virtual machine architecture.
We propose a novel abstraction for multiple disjoint
JavaScript heaps, which we call compartments. We use
the notion of document origin to cluster objects into
separate compartments. Objects within a compartment can
reference each other directly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tarau:2011:IST,
author = "Paul Tarau",
title = "Integrated symbol table, engine and heap memory
management in multi-engine {Prolog}",
journal = j-SIGPLAN,
volume = "46",
number = "11",
pages = "129--138",
month = nov,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2076022.1993497",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 15 07:46:57 MST 2011",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '11 conference proceedings.",
abstract = "We describe an integrated solution to symbol, heap and
logic engine memory management in a context where
exchanges of arbitrary Prolog terms occur between
multiple dynamically created engines, implemented in a
new Java-based experimental Prolog system. As our
symbols represent not just Prolog atoms, but also
handles to Java objects (including arbitrary size
integers and decimals), everything is centered around a
symbol garbage collection algorithm ensuring that
external objects are shared and exchanged between logic
engines efficiently. Taking advantage of a tag-on-data
heap representation of Prolog terms, our algorithm
performs in-place updates of live symbol references
directly on heap cells.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Petricek:2011:EMP,
author = "Tomas Petricek and Alan Mycroft and Don Syme",
title = "Extending monads with pattern matching",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "1--12",
month = dec,
year = "2011",
DOI = "https://doi.org/10.1145/2096148.2034677",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sequencing of effectful computations can be neatly
captured using monads and elegantly written using do
notation. In practice such monads often allow
additional ways of composing computations, which have
to be written explicitly using combinators. We identify
joinads, an abstract notion of computation that is
stronger than monads and captures many such ad-hoc
extensions. In particular, joinads are monads with
three additional operations: one of type $m a \to m b
\to m (a, b)$ captures various forms of parallel
composition, one of type $m a \to m a \to m a$ that is
inspired by choice and one of type $m a \to m (m a)$
that captures aliasing of computations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Giorgidze:2011:BBM,
author = "George Giorgidze and Torsten Grust and Nils
Schweinsberg and Jeroen Weijers",
title = "Bringing back monad comprehensions",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "13--22",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034678",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper is about a Glasgow Haskell Compiler (GHC)
extension that generalises Haskell's list comprehension
notation to monads. The monad comprehension notation
implemented by the extension supports generator and
filter clauses, as was the case in the Haskell 1.4
standard. In addition, the extension generalises the
recently proposed parallel and SQL-like list
comprehension notations to monads. The aforementioned
generalisations are formally defined in this paper. The
extension will be available in GHC 7.2. This paper
gives several instructive examples that we hope will
facilitate wide adoption of the extension by the
Haskell community. We also argue why the do notation is
not always a good fit for monadic libraries and
embedded domain-specific languages, especially for
those that are based on collection monads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Bolingbroke:2011:TCF,
author = "Maximilian Bolingbroke and Simon Peyton Jones and
Dimitrios Vytiniotis",
title = "Termination combinators forever",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "23--34",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034680",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a library-based approach to constructing
termination tests suitable for controlling termination
of symbolic methods such as partial evaluation,
supercompilation and theorem proving. With our
combinators, all termination tests are correct by
construction. We show how the library can be designed
to embody various optimisations of the termination
tests, which the user of the library takes advantage of
entirely transparently.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Westbrook:2011:HHL,
author = "Edwin Westbrook and Nicolas Frisby and Paul Brauner",
title = "{Hobbits} for {Haskell}: a library for higher-order
encodings in functional programming languages",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "35--46",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034681",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Adequate encodings are a powerful programming tool,
which eliminate whole classes of program bugs: they
ensure that a program cannot generate ill-formed data,
because such data is not part of the representation;
and they also ensure that a program is well-defined,
meaning that it cannot have different behaviors on
different representations of the same piece of data.
Unfortunately, it has proven difficult to define
adequate encodings of programming languages themselves.
Such encodings would be very useful in language
processing tools such as interpreters, compilers,
model-checking tools, etc., as these systems are often
difficult to get correct. The key problem in
representing programming languages is in encoding
binding constructs; previous approaches have serious
limitations in either the operations they allow or the
correctness guarantees they make.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Harper:2011:LWG,
author = "Thomas Harper",
title = "A library writer's guide to shortcut fusion",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "47--58",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034682",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There are now a variety of shortcut fusion techniques
in the wild for removing intermediate data structures
in Haskell. They are often presented, however,
specialised to a specific data structure and interface.
This can make it difficult to transfer these techniques
to other settings. In this paper, we give a roadmap for
a library writer who would like to implement fusion for
his own library. We explain shortcut fusion without
reference to any specific implementation by treating it
as an instance of data refinement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Lippmeier:2011:EPS,
author = "Ben Lippmeier and Gabriele Keller",
title = "Efficient parallel stencil convolution in {Haskell}",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "59--70",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034684",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Stencil convolution is a fundamental building block of
many scientific and image processing algorithms. We
present a declarative approach to writing such
convolutions in Haskell that is both efficient at
runtime and implicitly parallel. To achieve this we
extend our prior work on the Repa array library with
two new features: partitioned and cursored arrays.
Combined with careful management of the interaction
between GHC and its back-end code generator LLVM, we
achieve performance comparable to the standard OpenCV
library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Marlow:2011:MDP,
author = "Simon Marlow and Ryan Newton and Simon Peyton Jones",
title = "A monad for deterministic parallelism",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "71--82",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034685",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new programming model for deterministic
parallel computation in a pure functional language. The
model is monadic and has explicit granularity, but
allows dynamic construction of dataflow networks that
are scheduled at runtime, while remaining deterministic
and pure. The implementation is based on monadic
concurrency, which has until now only been used to
simulate concurrency in functional languages, rather
than to provide parallelism. We present the API with
its semantics, and argue that parallel execution is
deterministic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Leijen:2011:PCP,
author = "Daan Leijen and Manuel Fahndrich and Sebastian
Burckhardt",
title = "Prettier concurrency: purely functional concurrent
revisions",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "83--94",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034686",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This article presents an extension to the work of
Launchbury and Peyton-Jones on the ST monad. Using a
novel model for concurrency, called concurrent
revisions [3,5], we show how we can use concurrency
together with imperative mutable variables, while still
being able to safely convert such computations (in the
Rev monad) into pure values again. In contrast to many
other transaction models, like software transactional
memory (STM), concurrent revisions never use rollback
and always deterministically resolve conflicts. As a
consequence, concurrent revisions integrate well with
side-effecting I/O operations. Using deterministic
conflict resolution, concurrent revisions can deal well
with situations where there are many conflicts between
different threads that modify a shared data
structure.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Stefan:2011:FDI,
author = "Deian Stefan and Alejandro Russo and John C. Mitchell
and David Mazi{\`e}res",
title = "Flexible dynamic information flow control in
{Haskell}",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "95--106",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034688",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a new, dynamic, floating-label approach to
language-based information flow control, and present an
implementation in Haskell. A labeled IO monad, LIO,
keeps track of a current label and permits restricted
access to IO functionality, while ensuring that the
current label exceeds the labels of all data observed
and restricts what can be modified. Unlike other
language-based work, LIO also bounds the current label
with a current clearance that provides a form of
discretionary access control. In addition, programs may
encapsulate and pass around the results of computations
with different labels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Duregaard:2011:EPG,
author = "Jonas Dureg{\aa}rd and Patrik Jansson",
title = "Embedded parser generators",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "107--117",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034689",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel method of embedding context-free
grammars in Haskell, and to automatically generate
parsers and pretty-printers from them. We have
implemented this method in a library called BNFC-meta
(from the BNF Converter, which it is built on). The
library builds compiler front ends using
metaprogramming instead of conventional code
generation. Parsers are built from labelled BNF
grammars that are defined directly in Haskell modules.
Our solution combines features of parser generators
(static grammar checks, a highly specialised grammar
DSL) and adds several features that are otherwise
exclusive to combinatory libraries such as the ability
to reuse, parameterise and generate grammars inside
Haskell.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Epstein:2011:THC,
author = "Jeff Epstein and Andrew P. Black and Simon
Peyton-Jones",
title = "Towards {Haskell} in the cloud",
journal = j-SIGPLAN,
volume = "46",
number = "12",
pages = "118--129",
month = dec,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2096148.2034690",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jan 17 17:51:46 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Cloud Haskell, a domain-specific language
for developing programs for a distributed computing
environment. Implemented as a shallow embedding in
Haskell, it provides a message-passing communication
model, inspired by Erlang, without introducing
incompatibility with Haskell's established
shared-memory concurrency. A key contribution is a
method for serializing function closures for
transmission across the network. Cloud Haskell has been
implemented; we present example code and some
preliminary performance measurements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '11 conference proceedings.",
}
@Article{Black:2012:PSD,
author = "Andrew P. Black and Peter W. O'Hearn",
title = "Presentation of the {SIGPLAN} distinguished
achievement award to {Sir Charles Antony Richard Hoare,
FRS, FREng, FBCS}; and interview",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "1--2",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103658",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Hoare:2012:MTR,
author = "Tony Hoare",
title = "Message of thanks: on the receipt of the {2011 ACM
SIGPLAN} distinguished achievement award",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "3--6",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103659",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{vanStaden:2012:F,
author = "Stephan van Staden and Cristiano Calcagno and Bertrand
Meyer",
title = "Freefinement",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "7--18",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103661",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Freefinement is an algorithm that constructs a sound
refinement calculus from a verification system under
certain conditions. In this paper, a verification
system is any formal system for establishing whether an
inductively defined term, typically a program,
satisfies a specification. Examples of verification
systems include Hoare logics and type systems.
Freefinement first extends the term language to include
specification terms, and builds a verification system
for the extended language that is a sound and
conservative extension of the original system. The
extended system is then transformed into a sound
refinement calculus. The resulting refinement calculus
can interoperate closely with the verification system
--- it is even possible to reuse and translate proofs
between them. Freefinement gives a semantics to
refinement at an abstract level: it associates each
term of the extended language with a set of terms from
the original language, and refinement simply reduces
this set. The paper applies freefinement to a simple
type system for the lambda calculus and also to a Hoare
logic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Joshi:2012:UHI,
author = "Saurabh Joshi and Shuvendu K. Lahiri and Akash Lal",
title = "Underspecified harnesses and interleaved bugs",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "19--30",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103662",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Static assertion checking of open programs requires
setting up a precise harness to capture the environment
assumptions. For instance, a library may require a file
handle to be properly initialized before it is passed
into it. A harness is used to set up or specify the
appropriate preconditions before invoking methods from
the program. In the absence of a precise harness, even
the most precise automated static checkers are bound to
report numerous false alarms. This often limits the
adoption of static assertion checking in the hands of a
user. In this work, we explore the possibility of
automatically filtering away (or prioritizing) warnings
that result from imprecision in the harness. We limit
our attention to the scenario when one is interested in
finding bugs due to concurrency. We define a warning to
be an interleaved bug when it manifests on an input for
which no sequential interleaving produces a warning. As
we argue in the paper, limiting a static analysis to
only consider interleaved bugs greatly reduces false
positives during static concurrency analysis in the
presence of an imprecise harness. We formalize
interleaved bugs as a differential analysis between the
original program and its sequential version and provide
various techniques for finding them. Our implementation
CBugs demonstrates that the scheme of finding
interleaved bugs can alleviate the need to construct
precise harnesses while checking real-life concurrent
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Gardner:2012:TPL,
author = "Philippa Anne Gardner and Sergio Maffeis and Gareth
David Smith",
title = "Towards a program logic for {JavaScript}",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "31--44",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103663",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript has become the most widely used language
for client-side web programming. The dynamic nature of
JavaScript makes understanding its code notoriously
difficult, leading to buggy programs and a lack of
adequate static-analysis tools. We believe that logical
reasoning has much to offer JavaScript: a simple
description of program behaviour, a clear understanding
of module boundaries, and the ability to verify
security contracts. We introduce a program logic for
reasoning about a broad subset of JavaScript, including
challenging features such as prototype inheritance and
`with'. We adapt ideas from separation logic to provide
tractable reasoning about JavaScript code: reasoning
about easy programs is easy; reasoning about hard
programs is possible. We prove a strong soundness
result. All libraries written in our subset and proved
correct with respect to their specifications will be
well-behaved, even when called by arbitrary JavaScript
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Krishnaswami:2012:HOF,
author = "Neelakantan R. Krishnaswami and Nick Benton and Jan
Hoffmann",
title = "Higher-order functional reactive programming in
bounded space",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "45--58",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103665",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional reactive programming (FRP) is an elegant
and successful approach to programming reactive systems
declaratively. The high levels of abstraction and
expressivity that make FRP attractive as a programming
model do, however, often lead to programs whose
resource usage is excessive and hard to predict. In
this paper, we address the problem of space leaks in
discrete-time functional reactive programs. We present
a functional reactive programming language that
statically bounds the size of the dataflow graph a
reactive program creates, while still permitting use of
higher-order functions and higher-type streams such as
streams of streams. We achieve this with a novel linear
type theory that both controls allocation and ensures
that all recursive definitions are well-founded. We
also give a denotational semantics for our language by
combining recent work on metric spaces for the
interpretation of higher-order causal functions with
length-space models of space-bounded computation. The
resulting category is doubly closed and hence forms a
model of the logic of bunched implications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Hur:2012:MBK,
author = "Chung-Kil Hur and Derek Dreyer and Georg Neis and
Viktor Vafeiadis",
title = "The marriage of bisimulations and {Kripke} logical
relations",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "59--72",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103666",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There has been great progress in recent years on
developing effective techniques for reasoning about
program equivalence in ML-like languages---that is,
languages that combine features like higher-order
functions, recursive types, abstract types, and general
mutable references. Two of the most prominent types of
techniques to have emerged are *bisimulations* and
*Kripke logical relations (KLRs)*. While both
approaches are powerful, their complementary advantages
have led us and other researchers to wonder whether
there is an essential tradeoff between them.
Furthermore, both approaches seem to suffer from
fundamental limitations if one is interested in scaling
them to inter-language reasoning. In this paper, we
propose *relation transition systems (RTSs)*, which
marry together some of the most appealing aspects of
KLRs and bisimulations. In particular, RTSs show how
bisimulations' support for reasoning about recursive
features via *coinduction* can be synthesized with
KLRs' support for reasoning about local state via
*state transition systems*. Moreover, we have designed
RTSs to avoid the limitations of KLRs and bisimulations
that preclude their generalization to inter-language
reasoning. Notably, unlike KLRs, RTSs are transitively
composable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{James:2012:IE,
author = "Roshan P. James and Amr Sabry",
title = "Information effects",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "73--84",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103667",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computation is a physical process which, like all
other physical processes, is fundamentally reversible.
From the notion of type isomorphisms, we derive a
typed, universal, and reversible computational model in
which information is treated as a linear resource that
can neither be duplicated nor erased. We use this model
as a semantic foundation for computation and show that
the `gap' between conventional irreversible computation
and logically reversible computation can be captured by
a type-and-effect system. Our type-and-effect system is
structured as an arrow metalanguage that exposes
creation and erasure of information as explicit effect
operations. Irreversible computations arise from
interactions with an implicit information environment,
thus making them a derived notion, much like open
systems in Physics. We sketch several applications
which can benefit from an explicit treatment of
information effects, such as quantitative
information-flow security and differential privacy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Yang:2012:LAE,
author = "Jean Yang and Kuat Yessenov and Armando Solar-Lezama",
title = "A language for automatically enforcing privacy
policies",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "85--96",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103669",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is becoming increasingly important for applications
to protect sensitive data. With current techniques, the
programmer bears the burden of ensuring that the
application's behavior adheres to policies about where
sensitive values may flow. Unfortunately, privacy
policies are difficult to manage because their global
nature requires coordinated reasoning and enforcement.
To address this problem, we describe a programming
model that makes the system responsible for ensuring
adherence to privacy policies. The programming model
has two components: (1) core programs describing
functionality independent of privacy concerns and (2)
declarative, decentralized policies controlling how
sensitive values are disclosed. Each sensitive value
encapsulates multiple views; policies describe which
views are allowed based on the output context. The
system is responsible for automatically ensuring that
outputs are consistent with the policies. We have
implemented this programming model in a new functional
constraint language named Jeeves. In Jeeves, sensitive
values are introduced as symbolic variables and
policies correspond to constraints that are resolved at
output channels. We have implemented Jeeves as a Scala
library using an SMT solver as a model finder. In this
paper we describe the dynamic and static semantics of
Jeeves and the properties about policy enforcement that
the semantics guarantees. We also describe our
experience implementing a conference management system
and a social network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Barthe:2012:PRR,
author = "Gilles Barthe and Boris K{\"o}pf and Federico Olmedo
and Santiago Zanella B{\'e}guelin",
title = "Probabilistic relational reasoning for differential
privacy",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "97--110",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103670",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Differential privacy is a notion of confidentiality
that protects the privacy of individuals while allowing
useful computations on their private data. Deriving
differential privacy guarantees for real programs is a
difficult and error-prone task that calls for
principled approaches and tool support. Approaches
based on linear types and static analysis have recently
emerged; however, an increasing number of programs
achieve privacy using techniques that cannot be
analyzed by these approaches. Examples include programs
that aim for weaker, approximate differential privacy
guarantees, programs that use the Exponential
mechanism, and randomized programs that achieve
differential privacy without using any standard
mechanism. Providing support for reasoning about the
privacy of such programs has been an open problem. We
report on CertiPriv, a machine-checked framework for
reasoning about differential privacy built on top of
the Coq proof assistant. The central component of
CertiPriv is a quantitative extension of a
probabilistic relational Hoare logic that enables one
to derive differential privacy guarantees for programs
from first principles. We demonstrate the
expressiveness of CertiPriv using a number of examples
whose formal analysis is out of the reach of previous
techniques. In particular, we provide the first
machine-checked proofs of correctness of the Laplacian
and Exponential mechanisms and of the privacy of
randomized and streaming algorithms from the recent
literature.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Heidegger:2012:APC,
author = "Phillip Heidegger and Annette Bieniusa and Peter
Thiemann",
title = "Access permission contracts for scripting languages",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "111--122",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103671",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The ideal software contract fully specifies the
behavior of an operation. Often, in particular in the
context of scripting languages, a full specification
may be cumbersome to state and may not even be desired.
In such cases, a partial specification, which describes
selected aspects of the behavior, may be used to raise
the confidence in an implementation of the operation to
a reasonable level. We propose a novel kind of contract
for object-based languages that specifies the side
effects of an operation with access permissions. An
access permission contract uses sets of access paths to
express read and write permissions for the properties
of the objects accessible from the operation. We
specify a monitoring semantics for access permission
contracts and implement this semantics in a contract
system for JavaScript. We prove soundness and stability
of violation under increasing aliasing for our
semantics. Applications of access permission contracts
include enforcing modularity, test-driven development,
program understanding, and regression testing. With
respect to testing and understanding, we find that
adding access permissions to contracts increases the
effectiveness of error detection through contract
monitoring by 6-13\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Madhusudan:2012:RPI,
author = "Parthasarathy Madhusudan and Xiaokang Qiu and Andrei
Stefanescu",
title = "Recursive proofs for inductive tree data-structures",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "123--136",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103673",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We develop logical mechanisms and procedures to
facilitate the verification of full functional
properties of inductive tree data-structures using
recursion that are sound, incomplete, but terminating.
Our contribution rests in a new extension of
first-order logic with recursive definitions called
Dryad, a syntactical restriction on pre- and
post-conditions of recursive imperative programs using
Dryad, and a systematic methodology for accurately
unfolding the footprint on the heap uncovered by the
program that leads to finding simple recursive proofs
using formula abstraction and calls to SMT solvers. We
evaluate our methodology empirically and show that
several complex tree data-structure algorithms can be
checked against full functional specifications
automatically, given pre- and post-conditions. This
results in the first automatic terminating methodology
for proving a wide variety of annotated algorithms on
tree data-structures correct, including max-heaps,
treaps, red-black trees, AVL trees, binomial heaps, and
B-trees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Veanes:2012:SFS,
author = "Margus Veanes and Pieter Hooimeijer and Benjamin
Livshits and David Molnar and Nikolaj Bjorner",
title = "Symbolic finite state transducers: algorithms and
applications",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "137--150",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103674",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Finite automata and finite transducers are used in a
wide range of applications in software engineering,
from regular expressions to specification languages. We
extend these classic objects with symbolic alphabets
represented as parametric theories. Admitting
potentially infinite alphabets makes this
representation strictly more general and succinct than
classical finite transducers and automata over strings.
Despite this, the main operations, including
composition, checking that a transducer is
single-valued, and equivalence checking for
single-valued symbolic finite transducers are effective
given a decision procedure for the background theory.
We provide novel algorithms for these operations and
extend composition to symbolic transducers augmented
with registers. Our base algorithms are unusual in that
they are nonconstructive, therefore, we also supply a
separate model generation algorithm that can quickly
find counterexamples in the case two symbolic finite
transducers are not equivalent. The algorithms give
rise to a complete decidable algebra of symbolic
transducers. Unlike previous work, we do not need any
syntactic restriction of the formulas on the
transitions, only a decision procedure. In practice we
leverage recent advances in satisfiability modulo
theory (SMT) solvers. We demonstrate our techniques on
four case studies, covering a wide range of
applications. Our techniques can synthesize string
pre-images in excess of 8,000 bytes in roughly a
minute, and we find that our new encodings
significantly outperform previous techniques in
succinctness and speed of analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Koksal:2012:CC,
author = "Ali Sinan K{\"o}ksal and Viktor Kuncak and Philippe
Suter",
title = "Constraints as control",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "151--164",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103675",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an extension of Scala that supports
constraint programming over bounded and unbounded
domains. The resulting language, Kaplan, provides the
benefits of constraint programming while preserving the
existing features of Scala. Kaplan integrates
constraint and imperative programming by using
constraints as an advanced control structure; the
developers use the monadic 'for' construct to iterate
over the solutions of constraints or branch on the
existence of a solution. The constructs we introduce
have simple semantics that can be understood as
explicit enumeration of values, but are implemented
more efficiently using symbolic reasoning. Kaplan
programs can manipulate constraints at run-time, with
the combined benefits of type-safe syntax trees and
first-class functions. The language of constraints is a
functional subset of Scala, supporting arbitrary
recursive function definitions over algebraic data
types, sets, maps, and integers. Our implementation
runs on a platform combining a constraint solver with a
standard virtual machine. For constraint solving we use
an algorithm that handles recursive function
definitions through fair function unrolling and builds
upon the state-of-the art SMT solver Z3. We evaluate
Kaplan on examples ranging from enumeration of data
structures to execution of declarative specifications.
We found Kaplan promising because it is expressive,
supporting a range of problem domains, while enabling
full-speed execution of programs that do not rely on
constraint programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Austin:2012:MFD,
author = "Thomas H. Austin and Cormac Flanagan",
title = "Multiple facets for dynamic information flow",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "165--178",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103677",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript has become a central technology of the web,
but it is also the source of many security problems,
including cross-site scripting attacks and malicious
advertising code. Central to these problems is the fact
that code from untrusted sources runs with full
privileges. We implement information flow controls in
Firefox to help prevent violations of data
confidentiality and integrity. Most previous
information flow techniques have primarily relied on
either static type systems, which are a poor fit for
JavaScript, or on dynamic analyses that sometimes get
stuck due to problematic implicit flows, even in
situations where the target web application correctly
satisfies the desired security policy. We introduce
faceted values, a new mechanism for providing
information flow security in a dynamic manner that
overcomes these limitations. Taking inspiration from
secure multi-execution, we use faceted values to
simultaneously and efficiently simulate multiple
executions for different security levels, thus
providing non-interference with minimal overhead, and
without the reliance on the stuck executions of prior
dynamic approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Ray:2012:DCI,
author = "Donald Ray and Jay Ligatti",
title = "Defining code-injection attacks",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "179--190",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103678",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper shows that existing definitions of
code-injection attacks (e.g., SQL-injection attacks)
are flawed. The flaws make it possible for attackers to
circumvent existing mechanisms, by supplying
code-injecting inputs that are not recognized as such.
The flaws also make it possible for benign inputs to be
treated as attacks. After describing these flaws in
conventional definitions of code-injection attacks,
this paper proposes a new definition, which is based on
whether the symbols input to an application get used as
(normal-form) values in the application's output.
Because values are already fully evaluated, they cannot
be considered `code' when injected. This simple new
definition of code-injection attacks avoids the
problems of existing definitions, improves our
understanding of how and when such attacks occur, and
enables us to evaluate the effectiveness of mechanisms
for mitigating such attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Basu:2012:DCR,
author = "Samik Basu and Tevfik Bultan and Meriem Ouederni",
title = "Deciding choreography realizability",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "191--202",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103680",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Since software systems are becoming increasingly more
concurrent and distributed, modeling and analysis of
interactions among their components is a crucial
problem. In several application domains, message-based
communication is used as the interaction mechanism, and
the communication contract among the components of the
system is specified semantically as a state machine. In
the service-oriented computing domain such
communication contracts are called `choreography'
specifications. A choreography specification identifies
allowable ordering of message exchanges in a
distributed system. A fundamental question about a
choreography specification is determining its
realizability, i.e., given a choreography
specification, is it possible to build a distributed
system that communicates exactly as the choreography
specifies? Checking realizability of choreography
specifications has been an open problem for several
years and it was not known if this was a decidable
problem. In this paper we give necessary and sufficient
conditions for realizability of choreographies. We
implemented the proposed realizability check and our
experiments show that it can efficiently determine the
realizability of (1) web service choreographies, (2)
Singularity OS channel contracts, and (3) UML
collaboration (communication) diagrams.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Bouajjani:2012:ARP,
author = "Ahmed Bouajjani and Michael Emmi",
title = "Analysis of recursively parallel programs",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "203--214",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103681",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a general formal model of isolated
hierarchical parallel computations, and identify
several fragments to match the concurrency constructs
present in real-world programming languages such as
Cilk and X10. By associating fundamental formal models
(vector addition systems with recursive transitions) to
each fragment, we provide a common platform for
exposing the relative difficulties of algorithmic
reasoning. For each case we measure the complexity of
deciding state-reachability for finite-data recursive
programs, and propose algorithms for the decidable
cases. The complexities which include PTIME, NP,
EXPSPACE, and 2EXPTIME contrast with undecidable
state-reachability for recursive multi-threaded
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Rexford:2012:PLP,
author = "Jennifer Rexford",
title = "Programming languages for programmable networks",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "215--216",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103683",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's computer networks perform a bewildering array
of tasks, from routing and access control, to traffic
monitoring and load balancing. To support wireless
users accessing services hosted in the cloud,
enterprise and data-center networks are under
increasing pressure to support client mobility,
virtual-machine migration, resource isolation between
cloud services, and energy-efficient operation. Yet,
network administrators must configure the network
through closed and proprietary interfaces to
heterogeneous devices, such as routers, switches,
firewalls, load balancers, network address translators,
and intrusion detection systems. Not surprisingly,
configuring these complex networks is expensive and
error-prone, and innovation in network management
proceeds at a snail's pace. During the past several
years, the networking industry and research community
have pushed for greater openness in networking
software, and a clearer separation between networking
devices and the software that controls them. This broad
trend is known as Software Defined Networking (SDN). A
hallmark of SDN is having an open interface for
controller software running on a commodity computer to
install packet-processing rules in the underlying
switches. In particular, the OpenFlow protocol (see
www.openflow.org) has significant momentum. Many
commercial switches support OpenFlow, and a number of
campus, data-center, and backbone networks have
deployed the new technology. With the emergence of open
interfaces to network devices, the time is ripe to
rethink the design of network software, to put
networking on a stronger foundation and foster
innovation in networked services. The programming
languages community can play a vital role in this
transformation, by creating languages, compilers,
run-time systems, and testing and verification
techniques that raise the level of abstraction for
programming the network. In this talk, we give an
overview of Software Defined Networking, and survey the
early programming-languages research in this area. We
also outline exciting opportunities for
interdisciplinary research at the intersection of
programming languages and computer networks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Monsanto:2012:CRT,
author = "Christopher Monsanto and Nate Foster and Rob Harrison
and David Walker",
title = "A compiler and run-time system for network programming
languages",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "217--230",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103685",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software-defined networks (SDNs) are a new kind of
network architecture in which a controller machine
manages a distributed collection of switches by
instructing them to install or uninstall
packet-forwarding rules and report traffic statistics.
The recently formed Open Networking Consortium, whose
members include Google, Facebook, Microsoft, Verizon,
and others, hopes to use this architecture to transform
the way that enterprise and data center networks are
implemented. In this paper, we define a high-level,
declarative language, called NetCore, for expressing
packet-forwarding policies on SDNs. NetCore is
expressive, compositional, and has a formal semantics.
To ensure that a majority of packets are processed
efficiently on switches---instead of on the
controller---we present new compilation algorithms for
NetCore and couple them with a new run-time system that
issues rule installation commands and
traffic-statistics queries to switches. Together, the
compiler and run-time system generate efficient rules
whenever possible and outperform the simple, manual
techniques commonly used to program SDNs today. In
addition, the algorithms we develop are generic,
assuming only that the packet-matching capabilities
available on switches satisfy some basic algebraic
laws. Overall, this paper delivers a new design for a
high-level network programming language; an improved
set of compiler algorithms; a new run-time system for
SDN architectures; the first formal semantics and
proofs of correctness in this domain; and an
implementation and evaluation that demonstrates the
performance benefits over traditional manual
techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Chugh:2012:NRL,
author = "Ravi Chugh and Patrick M. Rondon and Ranjit Jhala",
title = "Nested refinements: a logic for duck typing",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "231--244",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103686",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programs written in dynamic languages make heavy use
of features --- run-time type tests, value-indexed
dictionaries, polymorphism, and higher-order functions
--- that are beyond the reach of type systems that
employ either purely syntactic or purely semantic
reasoning. We present a core calculus, System D, that
merges these two modes of reasoning into a single
powerful mechanism of nested refinement types wherein
the typing relation is itself a predicate in the
refinement logic. System D coordinates SMT-based
logical implication and syntactic subtyping to
automatically typecheck sophisticated dynamic language
programs. By coupling nested refinements with
McCarthy's theory of finite maps, System D can
precisely reason about the interaction of higher-order
functions, polymorphism, and dictionaries. The addition
of type predicates to the refinement logic creates a
circularity that leads to unique technical challenges
in the metatheory, which we solve with a novel
stratification approach that we use to prove the
soundness of System D.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Cousot:2012:AIFa,
author = "Patrick Cousot and Radhia Cousot",
title = "An abstract interpretation framework for termination",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "245--258",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103687",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Proof, verification and analysis methods for
termination all rely on two induction principles: (1) a
variant function or induction on data ensuring progress
towards the end and (2) some form of induction on the
program structure. The abstract interpretation design
principle is first illustrated for the design of new
forward and backward proof, verification and analysis
methods for safety. The safety collecting semantics
defining the strongest safety property of programs is
first expressed in a constructive fixpoint form. Safety
proof and checking/verification methods then
immediately follow by fixpoint induction. Static
analysis of abstract safety properties such as
invariance are constructively designed by fixpoint
abstraction (or approximation) to (automatically) infer
safety properties. So far, no such clear design
principle did exist for termination so that the
existing approaches are scattered and largely not
comparable with each other. For (1), we show that this
design principle applies equally well to potential and
definite termination. The trace-based termination
collecting semantics is given a fixpoint definition.
Its abstraction yields a fixpoint definition of the
best variant function. By further abstraction of this
best variant function, we derive the Floyd/Turing
termination proof method as well as new static analysis
methods to effectively compute approximations of this
best variant function. For (2), we introduce a
generalization of the syntactic notion of structural
induction (as found in Hoare logic) into a semantic
structural induction based on the new semantic concept
of inductive trace cover covering execution traces by
segments, a new basis for formulating program
properties. Its abstractions allow for generalized
recursive proof, verification and static analysis
methods by induction on both program structure,
control, and data. Examples of particular instances
include Floyd's handling of loop cutpoints as well as
nested loops, Burstall's intermittent assertion total
correctness proof method, and Podelski-Rybalchenko
transition invariants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Hoder:2012:PGA,
author = "Krystof Hoder and Laura Kovacs and Andrei Voronkov",
title = "Playing in the grey area of proofs",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "259--272",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103689",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Interpolation is an important technique in
verification and static analysis of programs. In
particular, interpolants extracted from proofs of
various properties are used in invariant generation and
bounded model checking. A number of recent papers
studies interpolation in various theories and also
extraction of smaller interpolants from proofs. In
particular, there are several algorithms for extracting
of interpolants from so-called local proofs. The main
contribution of this paper is a technique of minimising
interpolants based on transformations of what we call
the `grey area' of local proofs. Another contribution
is a technique of transforming, under certain common
conditions, arbitrary proofs into local ones. Unlike
many other interpolation techniques, our technique is
very general and applies to arbitrary theories. Our
approach is implemented in the theorem prover Vampire
and evaluated on a large number of benchmarks coming
from first-order theorem proving and bounded model
checking using logic with equality, uninterpreted
functions and linear integer arithmetic. Our
experiments demonstrate the power of the new
techniques: for example, it is not unusual that our
proof transformation gives more than a tenfold
reduction in the size of interpolants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Stampoulis:2012:SUE,
author = "Antonis Stampoulis and Zhong Shao",
title = "Static and user-extensible proof checking",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "273--284",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103690",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite recent successes, large-scale proof
development within proof assistants remains an arcane
art that is extremely time-consuming. We argue that
this can be attributed to two profound shortcomings in
the architecture of modern proof assistants. The first
is that proofs need to include a large amount of minute
detail; this is due to the rigidity of the proof
checking process, which cannot be extended with
domain-specific knowledge. In order to avoid these
details, we rely on developing and using tactics,
specialized procedures that produce proofs.
Unfortunately, tactics are both hard to write and hard
to use, revealing the second shortcoming of modern
proof assistants. This is because there is no static
knowledge about their expected use and behavior. As has
recently been demonstrated, languages that allow
type-safe manipulation of proofs, like Beluga, Delphin
and VeriML, can be used to partly mitigate this second
issue, by assigning rich types to tactics. Still, the
architectural issues remain. In this paper, we build on
this existing work, and demonstrate two novel ideas: an
extensible conversion rule and support for static proof
scripts. Together, these ideas enable us to support
both user-extensible proof checking, and sophisticated
static checking of tactics, leading to a new point in
the design space of future proof assistants. Both ideas
are based on the interplay between a light-weight
staging construct and the rich type information
available.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Klein:2012:RYR,
author = "Casey Klein and John Clements and Christos Dimoulas
and Carl Eastlund and Matthias Felleisen and Matthew
Flatt and Jay A. McCarthy and Jon Rafkind and Sam
Tobin-Hochstadt and Robert Bruce Findler",
title = "Run your research: on the effectiveness of lightweight
mechanization",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "285--296",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103691",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Formal models serve in many roles in the programming
language community. In its primary role, a model
communicates the idea of a language design; the
architecture of a language tool; or the essence of a
program analysis. No matter which role it plays,
however, a faulty model doesn't serve its purpose. One
way to eliminate flaws from a model is to write it down
in a mechanized formal language. It is then possible to
state theorems about the model, to prove them, and to
check the proofs. Over the past nine years, PLT has
developed and explored a lightweight version of this
approach, dubbed Redex. In a nutshell, Redex is a
domain-specific language for semantic models that is
embedded in the Racket programming language. The effort
of creating a model in Redex is often no more
burdensome than typesetting it with LaTeX; the
difference is that Redex comes with tools for the
semantics engineering life cycle.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Farzan:2012:VPC,
author = "Azadeh Farzan and Zachary Kincaid",
title = "Verification of parameterized concurrent programs by
modular reasoning about data and control",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "297--308",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103693",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we consider the problem of verifying
thread-state properties of multithreaded programs in
which the number of active threads cannot be statically
bounded. Our approach is based on decomposing the task
into two modules, where one reasons about data and the
other reasons about control. The data module computes
thread-state invariants (e.g., linear constraints over
global variables and local variables of one thread)
using the thread interference information computed by
the control module. The control module computes a
representation of thread interference, as an
incrementally constructed data flow graph, using the
data invariants provided by the data module. These
invariants are used to rule out patterns of thread
interference that can not occur in a real program
execution. The two modules are incorporated into a
feedback loop, so that the abstractions of data and
interference are iteratively coarsened as the algorithm
progresses (that is, they become weaker) until a fixed
point is reached. Our approach is sound and
terminating, and applicable to programs with infinite
state (e.g., unbounded integers) and unboundedly many
threads. The verification method presented in this
paper has been implemented into a tool, called Duet. We
demonstrate the effectiveness of our technique by
verifying properties of a selection of Linux device
drivers using Duet, and also compare Duet with previous
work on verification of parameterized Boolean program
using the Boolean abstractions of these drivers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Botincan:2012:RSS,
author = "Matko Botincan and Mike Dodds and Suresh Jagannathan",
title = "Resource-sensitive synchronization inference by
abduction",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "309--322",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103694",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an analysis which takes as its input a
sequential program, augmented with annotations
indicating potential parallelization opportunities, and
a sequential proof, written in separation logic, and
produces a correctly-synchronized parallelized program
and proof of that program. Unlike previous work, ours
is not an independence analysis; we insert
synchronization constructs to preserve relevant
dependencies found in the sequential program that may
otherwise be violated by a naive translation.
Separation logic allows us to parallelize fine-grained
patterns of resource-usage, moving beyond
straightforward points-to analysis. Our analysis works
by using the sequential proof to discover dependencies
between different parts of the program. It leverages
these discovered dependencies to guide the insertion of
synchronization primitives into the parallelized
program, and to ensure that the resulting parallelized
program satisfies the same specification as the
original sequential program, and exhibits the same
sequential behaviour. Our analysis is built using frame
inference and abduction, two techniques supported by an
increasing number of separation logic tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Reddy:2012:SCI,
author = "Uday S. Reddy and John C. Reynolds",
title = "Syntactic control of interference for separation
logic",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "323--336",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103695",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Separation Logic has witnessed tremendous success in
recent years in reasoning about programs that deal with
heap storage. Its success owes to the fundamental
principle that one should keep separate areas of the
heap storage separate in program reasoning. However,
the way Separation Logic deals with program variables
continues to be based on traditional Hoare Logic
without taking any benefit of the separation principle.
This has led to unwieldy proof rules suffering from
lack of clarity as well as questions surrounding their
soundness. In this paper, we extend the separation idea
to the treatment of variables in Separation Logic,
especially Concurrent Separation Logic, using the
system of Syntactic Control of Interference proposed by
Reynolds in 1978. We extend the original system with
permission algebras, making it more powerful and able
to deal with the issues of concurrent programs. The
result is a streamined presentation of Concurrent
Separation Logic, whose rules are memorable and
soundness obvious. We also include a discussion of how
the new rules impact the semantics and devise static
analysis techniques to infer the required permissions
automatically.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Licata:2012:CDT,
author = "Daniel R. Licata and Robert Harper",
title = "Canonicity for $2$-dimensional type theory",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "337--348",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103697",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Higher-dimensional dependent type theory enriches
conventional one-dimensional dependent type theory with
additional structure expressing equivalence of elements
of a type. This structure may be employed in a variety
of ways to capture rather coarse identifications of
elements, such as a universe of sets considered modulo
isomorphism. Equivalence must be respected by all
families of types and terms, as witnessed
computationally by a type-generic program.
Higher-dimensional type theory has applications to code
reuse for dependently typed programming, and to the
formalization of mathematics. In this paper, we develop
a novel judgemental formulation of a two-dimensional
type theory, which enjoys a canonicity property: a
closed term of boolean type is definitionally equal to
true or false. Canonicity is a necessary condition for
a computational interpretation of type theory as a
programming language, and does not hold for existing
axiomatic presentations of higher-dimensional type
theory. The method of proof is a generalization of the
NuPRL semantics, interpreting types as syntactic
groupoids rather than equivalence relations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Kammar:2012:AFE,
author = "Ohad Kammar and Gordon D. Plotkin",
title = "Algebraic foundations for effect-dependent
optimisations",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "349--360",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103698",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a general theory of Gifford-style type and
effect annotations, where effect annotations are sets
of effects. Generality is achieved by recourse to the
theory of algebraic effects, a development of Moggi's
monadic theory of computational effects that emphasises
the operations causing the effects at hand and their
equational theory. The key observation is that
annotation effects can be identified with operation
symbols. We develop an annotated version of Levy's
Call-by-Push-Value language with a kind of computations
for every effect set; it can be thought of as a
sequential, annotated intermediate language. We develop
a range of validated optimisations (i.e.,
equivalences), generalising many existing ones and
adding new ones. We classify these optimisations as
structural, algebraic, or abstract: structural
optimisations always hold; algebraic ones depend on the
effect theory at hand; and abstract ones depend on the
global nature of that theory (we give
modularly-checkable sufficient conditions for their
validity).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Cretin:2012:PCA,
author = "Julien Cretin and Didier R{\'e}my",
title = "On the power of coercion abstraction",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "361--372",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103699",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Erasable coercions in System F-eta, also known as
retyping functions, are well-typed eta-expansions of
the identity. They may change the type of terms without
changing their behavior and can thus be erased before
reduction. Coercions in F-eta can model subtyping of
known types and some displacement of quantifiers, but
not subtyping assumptions nor certain forms of delayed
type instantiation. We generalize F-eta by allowing
abstraction over retyping functions. We follow a
general approach where computing with coercions can be
seen as computing in the lambda-calculus but keeping
track of which parts of terms are coercions. We obtain
a language where coercions do not contribute to the
reduction but may block it and are thus not erasable.
We recover erasable coercions by choosing a weak
reduction strategy and restricting coercion abstraction
to value-forms or by restricting abstraction to
coercions that are polymorphic in their domain or
codomain. The latter variant subsumes F-eta, F-sub, and
MLF in a unified framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Naik:2012:AT,
author = "Mayur Naik and Hongseok Yang and Ghila Castelnuovo and
Mooly Sagiv",
title = "Abstractions from tests",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "373--386",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103701",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a framework for leveraging dynamic analysis
to find good abstractions for static analysis. A static
analysis in our framework is parametrised. Our main
insight is to directly and efficiently compute from a
concrete trace, a necessary condition on the parameter
configurations to prove a given query, and thereby
prune the space of parameter configurations that the
static analysis must consider. We provide constructive
algorithms for two instance analyses in our framework:
a flow- and context-sensitive thread-escape analysis
and a flow- and context-insensitive points-to analysis.
We show the efficacy of these analyses, and our
approach, on six Java programs comprising two million
bytecodes: the thread-escape analysis resolves 80\% of
queries on average, disproving 28\% and proving 52\%;
the points-to analysis resolves 99\% of queries on
average, disproving 29\% and proving 70\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Smaragdakis:2012:SPR,
author = "Yannis Smaragdakis and Jacob Evans and Caitlin
Sadowski and Jaeheon Yi and Cormac Flanagan",
title = "Sound predictive race detection in polynomial time",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "387--400",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103702",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data races are among the most reliable indicators of
programming errors in concurrent software. For at least
two decades, Lamport's happens-before (HB) relation has
served as the standard test for detecting races--other
techniques, such as lockset-based approaches, fail to
be sound, as they may falsely warn of races. This work
introduces a new relation, causally-precedes (CP),
which generalizes happens-before to observe more races
without sacrificing soundness. Intuitively, CP tries to
capture the concept of happens-before ordered events
that must occur in the observed order for the program
to observe the same values. What distinguishes CP from
past predictive race detection approaches (which also
generalize an observed execution to detect races in
other plausible executions) is that CP-based race
detection is both sound and of polynomial complexity.
We demonstrate that the unique aspects of CP result in
practical benefit. Applying CP to real-world programs,
we successfully analyze server-level applications
(e.g., Apache FtpServer) and show that traces longer
than in past predictive race analyses can be analyzed
in mere seconds to a few minutes. For these programs,
CP race detection uncovers races that are hard to
detect by repeated execution and HB race detection: a
single run of CP race detection produces several races
not discovered by 10 separate rounds of happens-before
race detection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Bojanczyk:2012:TNC,
author = "Mikolaj Bojanczyk and Laurent Braud and Bartek Klin
and Slawomir Lasota",
title = "Towards nominal computation",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "401--412",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103704",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nominal sets are a different kind of set theory, with
a more relaxed notion of finiteness. They offer an
elegant formalism for describing lambda-terms modulo
alpha-conversion, or automata on data words. This paper
is an attempt at defining computation in nominal sets.
We present a rudimentary programming language, called
Nlambda. The key idea is that it includes a native type
for finite sets in the nominal sense. To illustrate the
power of our language, we write short programs that
process automata on data words.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Cave:2012:PBI,
author = "Andrew Cave and Brigitte Pientka",
title = "Programming with binders and indexed data-types",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "413--424",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103705",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show how to combine a general purpose type system
for an existing language with support for programming
with binders and contexts by refining the type system
of ML with a restricted form of dependent types where
index objects are drawn from contextual LF. This allows
the user to specify formal systems within the logical
framework LF and index ML types with contextual LF
objects. Our language design keeps the index language
generic only requiring decidability of equality of the
index language providing a modular design. To
illustrate the elegance and effectiveness of our
language, we give programs for closure conversion and
normalization by evaluation. Our three key technical
contribution are: (1) We give a bi-directional type
system for our core language which is centered around
refinement substitutions instead of constraint solving.
As a consequence, type checking is decidable and easy
to trust, although constraint solving may be
undecidable. (2) We give a big-step environment based
operational semantics with environments which lends
itself to efficient implementation. (3) We prove our
language to be type safe and have mechanized our
theoretical development in the proof assistant Coq
using the fresh approach to binding.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Moore:2012:MLF,
author = "J. Strother Moore",
title = "Meta-level features in an industrial-strength theorem
prover",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "425--426",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103707",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The ACL2 theorem prover---the current incarnation of
`the' Boyer--Moore theorem prover---is a theorem prover
for an extension of a first-order, applicative subset
of Common Lisp. The ACL2 system provides a useful
specification and modeling language as well as a useful
mechanical theorem proving environment. ACL2 is in use
at several major microprocessor manufacturers to verify
functional correctness of important components of
commercial designs. This talk explores the design of
ACL2 and the tradeoffs that have turned out to be
pivotal to its success.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Zhao:2012:FLI,
author = "Jianzhou Zhao and Santosh Nagarakatte and Milo M. K.
Martin and Steve Zdancewic",
title = "Formalizing the {LLVM} intermediate representation for
verified program transformations",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "427--440",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103709",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents Vellvm (verified LLVM), a
framework for reasoning about programs expressed in
LLVM's intermediate representation and transformations
that operate on it. Vellvm provides a mechanized formal
semantics of LLVM's intermediate representation, its
type system, and properties of its SSA form. The
framework is built using the Coq interactive theorem
prover. It includes multiple operational semantics and
proves relations among them to facilitate different
reasoning styles and proof techniques. To validate
Vellvm's design, we extract an interpreter from the Coq
formal semantics that can execute programs from LLVM
test suite and thus be compared against LLVM reference
implementations. To demonstrate Vellvm's practicality,
we formalize and verify a previously proposed
transformation that hardens C programs against spatial
memory safety violations. Vellvm's tools allow us to
extract a new, verified implementation of the
transformation pass that plugs into the real LLVM
infrastructure; its performance is competitive with the
non-verified, ad-hoc original.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Zhu:2012:RAA,
author = "Zeyuan Allen Zhu and Sasa Misailovic and Jonathan A.
Kelner and Martin Rinard",
title = "Randomized accuracy-aware program transformations for
efficient approximate computations",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "441--454",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103710",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the fact that approximate computations have
come to dominate many areas of computer science, the
field of program transformations has focused almost
exclusively on traditional semantics-preserving
transformations that do not attempt to exploit the
opportunity, available in many computations, to
acceptably trade off accuracy for benefits such as
increased performance and reduced resource consumption.
We present a model of computation for approximate
computations and an algorithm for optimizing these
computations. The algorithm works with two classes of
transformations: substitution transformations (which
select one of a number of available implementations for
a given function, with each implementation offering a
different combination of accuracy and resource
consumption) and sampling transformations (which
randomly discard some of the inputs to a given
reduction). The algorithm produces a $ (1 + \epsilon) $
randomized approximation to the optimal randomized
computation (which minimizes resource consumption
subject to a probabilistic accuracy specification in
the form of a maximum expected error or maximum error
variance).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Liang:2012:RGB,
author = "Hongjin Liang and Xinyu Feng and Ming Fu",
title = "A rely-guarantee-based simulation for verifying
concurrent program transformations",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "455--468",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103711",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Verifying program transformations usually requires
proving that the resulting program (the target) refines
or is equivalent to the original one (the source).
However, the refinement relation between individual
sequential threads cannot be preserved in general with
the presence of parallel compositions, due to
instruction reordering and the different granularities
of atomic operations at the source and the target. On
the other hand, the refinement relation defined based
on fully abstract semantics of concurrent programs
assumes arbitrary parallel environments, which is too
strong and cannot be satisfied by many well-known
transformations. In this paper, we propose a
Rely-Guarantee-based Simulation (RGSim) to verify
concurrent program transformations. The relation is
parametrized with constraints of the environments that
the source and the target programs may compose with. It
considers the interference between threads and their
environments, thus is less permissive than relations
over sequential programs. It is compositional w.r.t.
parallel compositions as long as the constraints are
satisfied. Also, RGSim does not require semantics
preservation under all environments, and can
incorporate the assumptions about environments made by
specific program transformations in the form of
rely/guarantee conditions. We use RGSim to reason about
optimizations and prove atomicity of concurrent
objects. We also propose a general garbage collector
verification framework based on RGSim, and verify the
Boehm et al. concurrent mark-sweep GC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Balabonski:2012:UAF,
author = "Thibaut Balabonski",
title = "A unified approach to fully lazy sharing",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "469--480",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103713",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We give an axiomatic presentation of
sharing-via-labelling for weak lambda-calculi, that
makes it possible to formally compare many different
approaches to fully lazy sharing, and obtain two
important results. We prove that the known
implementations of full laziness are all equivalent in
terms of the number of beta-reductions performed,
although they behave differently regarding the
duplication of terms. We establish a link between the
optimality theories of weak lambda-calculi and
first-order rewriting systems by expressing fully lazy
lambda-lifting in our framework, thus emphasizing the
first-order essence of weak reduction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Rastogi:2012:IOG,
author = "Aseem Rastogi and Avik Chaudhuri and Basil Hosmer",
title = "The ins and outs of gradual type inference",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "481--494",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103714",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Gradual typing lets programmers evolve their
dynamically typed programs by gradually adding explicit
type annotations, which confer benefits like improved
performance and fewer run-time failures. However, we
argue that such evolution often requires a giant leap,
and that type inference can offer a crucial missing
step. If omitted type annotations are interpreted as
unknown types, rather than the dynamic type, then
static types can often be inferred, thereby removing
unnecessary assumptions of the dynamic type. The
remaining assumptions of the dynamic type may then be
removed by either reasoning outside the static type
system, or restructuring the code. We present a type
inference algorithm that can improve the performance of
existing gradually typed programs without introducing
any new run-time failures. To account for dynamic
typing, types that flow in to an unknown type are
treated in a fundamentally different manner than types
that flow out. Furthermore, in the interests of
backward-compatibility, an escape analysis is conducted
to decide which types are safe to infer. We have
implemented our algorithm for ActionScript, and
evaluated it on the SunSpider and V8 benchmark suites.
We demonstrate that our algorithm can improve the
performance of unannotated programs as well as recover
most of the type annotations in annotated programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Hofmann:2012:EL,
author = "Martin Hofmann and Benjamin Pierce and Daniel Wagner",
title = "Edit lenses",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "495--508",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103715",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A lens is a bidirectional transformation between a
pair of connected data structures, capable of
translating an edit on one structure into an
appropriate edit on the other. Many varieties of lenses
have been studied, but none, to date, has offered a
satisfactory treatment of how edits are represented.
Many foundational accounts only consider edits of the
form `overwrite the whole structure,' leading to poor
behavior in many situations by failing to track the
associations between corresponding parts of the
structures when elements are inserted and deleted in
ordered lists, for example. Other theories of lenses do
maintain these associations, either by annotating the
structures themselves with change information or using
auxiliary data structures, but every extant theory
assumes that the entire original source structure is
part of the information passed to the lens. We offer a
general theory of edit lenses, which work with
descriptions of changes to structures, rather than with
the structures themselves. We identify a simple notion
of `editable structure'--a set of states plus a monoid
of edits with a partial monoid action on the
states--and construct a semantic space of lenses
between such structures, with natural laws governing
their behavior. We show how a range of constructions
from earlier papers on `state-based' lenses can be
carried out in this space, including composition,
products, sums, list operations, etc. Further, we show
how to construct edit lenses for arbitrary containers
in the sense of Abbott, Altenkirch, and Ghani. Finally,
we show that edit lenses refine a well-known
formulation of state-based lenses, in the sense that
every state-based lens gives rise to an edit lens over
structures with a simple overwrite-only edit language,
and conversely every edit lens on such structures gives
rise to a state-based lens.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Batty:2012:CCC,
author = "Mark Batty and Kayvan Memarian and Scott Owens and
Susmit Sarkar and Peter Sewell",
title = "Clarifying and compiling {C\slash C++} concurrency:
from {C++11} to {POWER}",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "509--520",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103717",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The upcoming C and C++ revised standards add
concurrency to the languages, for the first time, in
the form of a subtle *relaxed memory model* (the *C++11
model*). This aims to permit compiler optimisation and
to accommodate the differing relaxed-memory behaviours
of mainstream multiprocessors, combining simple
semantics for most code with high-performance
*low-level atomics* for concurrency libraries. In this
paper, we first establish two simpler but provably
equivalent models for C++11, one for the full language
and another for the subset without consume operations.
Subsetting further to the fragment without low-level
atomics, we identify a subtlety arising from atomic
initialisation and prove that, under an additional
condition, the model is equivalent to sequential
consistency for race-free programs. We then prove our
main result, the correctness of two proposed
compilation schemes for the C++11 load and store
concurrency primitives to Power assembly, having noted
that an earlier proposal was flawed. (The main ideas
apply also to ARM, which has a similar relaxed memory
architecture.) This should inform the ongoing
development of production compilers for C++11 and C1x,
clarifies what properties of the machine architecture
are required, and builds confidence in the C++11 and
Power semantics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Ramananandro:2012:MSC,
author = "Tahina Ramananandro and Gabriel {Dos Reis} and Xavier
Leroy",
title = "A mechanized semantics for {C++} object construction
and destruction, with applications to resource
management",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "521--532",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103718",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a formal operational semantics and its Coq
mechanization for the C++ object model, featuring
object construction and destruction, shared and
repeated multiple inheritance, and virtual function
call dispatch. These are key C++ language features for
high-level system programming, in particular for
predictable and reliable resource management. This
paper is the first to present a formal mechanized
account of the metatheory of construction and
destruction in C++, and applications to popular
programming techniques such as `resource acquisition is
initialization'. We also report on irregularities and
apparent contradictions in the ISO C++03 and C++11
standards.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Ellison:2012:EFS,
author = "Chucky Ellison and Grigore Rosu",
title = "An executable formal semantics of {C} with
applications",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "533--544",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103719",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes an executable formal semantics of
C. Being executable, the semantics has been thoroughly
tested against the GCC torture test suite and
successfully passes 99.2\% of 776 test programs. It is
the most complete and thoroughly tested formal
definition of C to date. The semantics yields an
interpreter, debugger, state space search tool, and
model checker `for free'. The semantics is shown
capable of automatically finding program errors, both
statically and at runtime. It is also used to enumerate
nondeterministic behavior.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Bhat:2012:TTP,
author = "Sooraj Bhat and Ashish Agarwal and Richard Vuduc and
Alexander Gray",
title = "A type theory for probability density functions",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "545--556",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103721",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There has been great interest in creating
probabilistic programming languages to simplify the
coding of statistical tasks; however, there still does
not exist a formal language that simultaneously
provides (1) continuous probability distributions, (2)
the ability to naturally express custom probabilistic
models, and (3) probability density functions (PDFs).
This collection of features is necessary for
mechanizing fundamental statistical techniques. We
formalize the first probabilistic language that
exhibits these features, and it serves as a
foundational framework for extending the ideas to more
general languages. Particularly novel are our type
system for absolutely continuous (AC) distributions
(those which permit PDFs) and our PDF calculation
procedure, which calculates PDFs for a large class of
AC distributions. Our formalization paves the way
toward the rigorous encoding of powerful statistical
reformulations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Naden:2012:TSB,
author = "Karl Naden and Robert Bocchino and Jonathan Aldrich
and Kevin Bierhoff",
title = "A type system for borrowing permissions",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "557--570",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103722",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In object-oriented programming, unique permissions to
object references are useful for checking correctness
properties such as consistency of typestate and
noninterference of concurrency. To be usable, unique
permissions must be borrowed --- for example, one must
be able to read a unique reference out of a field, use
it for something, and put it back. While one can null
out the field and later reassign it, this paradigm is
ungainly and requires unnecessary writes, potentially
hurting cache performance. Therefore, in practice
borrowing must occur in the type system, without
requiring memory updates. Previous systems support
borrowing with external alias analysis and/or explicit
programmer management of fractional permissions. While
these approaches are powerful, they are also awkward
and difficult for programmers to understand. We present
an integrated language and type system with unique,
immutable, and shared permissions, together with new
local permissions that say that a reference may not be
stored to the heap. Our system also includes change
permissions such as unique {\tt >>} unique and unique
{\tt >>} none that describe how permissions flow in and
out of method formal parameters. Together, these
features support common patterns of borrowing,
including borrowing multiple local permissions from a
unique reference and recovering the unique reference
when the local permissions go out of scope, without any
explicit management of fractions in the source
language. All accounting of fractional permissions is
done by the type system `under the hood.' We present
the syntax and static and dynamic semantics of a formal
core language and state soundness results. We also
illustrate the utility and practicality of our design
by using it to express several realistic examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{Strub:2012:SCB,
author = "Pierre-Yves Strub and Nikhil Swamy and Cedric Fournet
and Juan Chen",
title = "Self-certification: bootstrapping certified
typecheckers in {F*} with {Coq}",
journal = j-SIGPLAN,
volume = "47",
number = "1",
pages = "571--584",
month = jan,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2103621.2103723",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Mar 15 18:16:55 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Well-established dependently-typed languages like Agda
and Coq provide reliable ways to build and check formal
proofs. Several other dependently-typed languages such
as Aura, ATS, Cayenne, Epigram, F*, F7, Fine, Guru,
PCML5, and Ur also explore reliable ways to develop and
verify programs. All these languages shine in their own
regard, but their implementations do not themselves
enjoy the degree of safety provided by machine-checked
verification. We propose a general technique called
self-certification that allows a typechecker for a
suitably expressive language to be certified for
correctness. We have implemented this technique for F*,
a dependently typed language on the {.NET} platform.
Self-certification involves implementing a typechecker
for F* in F*, while using all the conveniences F*
provides for the compiler-writer (e.g., partiality,
effects, implicit conversions, proof automation,
libraries). This typechecker is given a specification
(in F*) strong enough to ensure that it computes valid
typing derivations. We obtain a typing derivation for
the core typechecker by running it on itself, and we
export it to Coq as a type-derivation certificate. By
typechecking this derivation (in Coq) and applying the
F* metatheory (also mechanized in Coq), we conclude
that our type checker is correct. Once certified in
this manner, the F* typechecker is emancipated from
Coq.\par
Self-certification leads to an efficient certification
scheme --- we no longer depend on verifying
certificates in Coq --- as well as a more broadly
applicable one. For instance, the self-certified F*
checker is suitable for use in adversarial settings
where Coq is not intended for use, such as run-time
certification of mobile code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '12 conference proceedings.",
}
@Article{DHondt:2012:ISS,
author = "Theo D'Hondt",
title = "An interpreter for server-side {HOP}",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "1--12",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047851",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "HOP is a Scheme-based multi-tier programming language
for the Web. The client-side of a program is compiled
to JavaScript, while the server-side is executed by a
mix of natively compiled code and interpreted code. At
the time where HOP programs were basic scripts, the
performance of the server-side interpreter was not a
concern; an inefficient interpreter was acceptable. As
HOP expanded, HOP programs got larger and more complex.
A more efficient interpreter was necessary. This new
interpreter is described in this paper. It is compact,
its whole implementation counting no more than 2.5
KLOC. It is more than twice faster than the old
interpreter and consumes less than a third of its
memory. Although it cannot compete with static or JIT
native compilers, our experimental results show that it
is amongst the fastest interpreters for dynamic
languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Chang:2012:IOT,
author = "Mason Chang and Bernd Mathiske and Edwin Smith and
Avik Chaudhuri and Andreas Gal and Michael Bebenita and
Christian Wimmer and Michael Franz",
title = "The impact of optional type information on {JIT}
compilation of dynamically typed languages",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "13--24",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047853",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Optionally typed languages enable direct performance
comparisons between untyped and type annotated source
code. We present a comprehensive performance evaluation
of two different JIT compilers in the context of
ActionScript, a production-quality optionally typed
language. One JIT compiler is optimized for quick
compilation rather than JIT compiled code performance.
The second JIT compiler is a more aggressively
optimizing compiler, performing both high-level and
low-level optimizations. We evaluate both JIT compilers
directly on the same benchmark suite, measuring their
performance changes across fully typed, partially
typed, and untyped code. Such evaluations are
especially relevant to dynamically typed languages such
as JavaScript, which are currently evaluating the idea
of adding optional type annotations. We demonstrate
that low-level optimizations rarely accelerate the
program enough to pay back the investment into
performing them in an optionally typed language. Our
experiments and data demonstrate that high-level
optimizations are required to improve performance by
any significant amount.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Homescu:2012:HTJ,
author = "Andrei Homescu and Alex Suhan",
title = "{HappyJIT}: a tracing {JIT} compiler for {PHP}",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "25--36",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047854",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Current websites are a combination of server-generated
dynamic content with client-side interactive programs.
Dynamically --- typed languages have gained a lot of
ground in both of these domains. The growth of Web 2.0
has introduced a myriad of websites which contain
personalized content, which is specific to the user.
PHP or Python programs generate the actual HTML page
after querying a database and processing the results,
which are then presented by the browser. It is becoming
more and more vital to accelerate the execution of
these programs, as this is a significant part of the
total time needed to present the page to the user. This
paper presents a novel interpreter for the PHP language
written in RPython, which the PyPy translator then
translates into C. The translator integrates into the
interpreter a tracing just-in-time compiler which
optimizes the hottest loops in the interpreted
programs. We also describe a data model that supports
all the data types in the PHP language, such as
references and iterators. We evaluate the performance
of this interpreter, showing that speedups up to a
factor of 8 are observed using this approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Zhao:2012:PTI,
author = "Tian Zhao",
title = "Polymorphic type inference for scripting languages
with object extensions",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "37--50",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047855",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a polymorphic type inference
algorithm for a small subset of JavaScript. The goal is
to prevent accessing undefined members of objects. We
define a type system that allows explicit extension of
objects through add operation and implicit extension
through method calls. The type system also permits
strong updates and unrestricted extensions to new
objects. The type inference algorithm is modular so
that each function definition is only analyzed once and
larger programs can be checked incrementally.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Hirschfeld:2012:EUC,
author = "Robert Hirschfeld and Michael Perscheid and Michael
Haupt",
title = "Explicit use-case representation in object-oriented
programming languages",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "51--60",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047856",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Use-cases are considered an integral part of most
contemporary development processes since they describe
a software system's expected behavior from the
perspective of its prospective users. However, the
presence of and traceability to use-cases is
increasingly lost in later more code-centric
development activities. Use-cases, being
well-encapsulated at the level of requirements
descriptions, eventually lead to crosscutting concerns
in system design and source code. Tracing which parts
of the system contribute to which use-cases is
therefore hard and so limits understandability. In this
paper, we propose an approach to making use-cases
first-class entities in both the programming language
and the runtime environment. Having use-cases present
in the code and the running system will allow
developers, maintainers, and operators to easily
associate their units of work with what matters to the
users. We suggest the combination of use-cases,
acceptance tests, and dynamic analysis to automatically
associate source code with use-cases. We present
UseCasePy, an implementation of our approach to
use-case-centered development in Python, and its
application to the Django Web framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Chevalier-Boisvert:2012:BSH,
author = "Maxime Chevalier-Boisvert and Erick Lavoie and Marc
Feeley and Bruno Dufour",
title = "Bootstrapping a self-hosted research virtual machine
for {JavaScript}: an experience report",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "61--72",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047858",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript is one of the most widely used dynamic
languages. The performance of existing JavaScript VMs,
however, is lower than that of VMs for static
languages. There is a need for a research VM to easily
explore new implementation approaches. This paper
presents the Tachyon JavaScript VM which was designed
to be flexible and to allow experimenting with new
approaches for the execution of JavaScript. The Tachyon
VM is itself implemented in JavaScript and currently
supports a subset of the full language that is
sufficient to bootstrap itself. The paper discusses the
architecture of the system and in particular the
bootstrapping of a self-hosted VM. Preliminary
performance results indicate that our VM, with few
optimizations, can already execute code faster than a
commercial JavaScript interpreter on some benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Klock:2012:BLR,
author = "Felix S. {Klock II} and William D. Clinger",
title = "Bounded-latency regional garbage collection",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "73--84",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047859",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Regional garbage collection is scalable, with
theoretical worst-case bounds for gc latency, MMU, and
throughput that are independent of mutator behavior and
the volume of reachable storage. Regional collection
improves upon the worst-case pause times and MMU seen
in most other general-purpose collectors, including
garbage-first and concurrent mark\slash sweep
collectors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Tew:2012:PAM,
author = "Kevin Tew and James Swaine and Matthew Flatt and
Robert Bruce Findler and Peter Dinda",
title = "{Places}: adding message-passing parallelism to
{Racket}",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "85--96",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047860",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Places bring new support for message-passing
parallelism to Racket. This paper gives an overview of
the programming model and how we had to modify our
existing, sequential runtime-system to support places.
We show that the freedom to design the programming
model helped us to make the implementation tractable;
specifically, we avoided the conventional pain of
adding just the right amount of locking to a big,
legacy runtime system. The paper presents an evaluation
of the design that includes both a real-world
application and standard parallel benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Stuchlik:2012:SVD,
author = "Andreas Stuchlik and Stefan Hanenberg",
title = "Static vs. dynamic type systems: an empirical study
about the relationship between type casts and
development time",
journal = j-SIGPLAN,
volume = "47",
number = "2",
pages = "97--106",
month = feb,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2168696.2047861",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Apr 20 17:34:09 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Static type systems are essential in computer science.
However, there is hardly any knowledge about the impact
of type systems on the resulting piece of software.
While there are authors that state that static types
increase the development speed, other authors argue the
other way around. A previous experiment suggests that
there are multiple factors that play a role for a
comparison of statically and dynamically typed
language. As a follow-up, this paper presents an
empirical study with 21 subjects that compares
programming tasks performed in Java and Groovy ---
programming tasks where the number of expected type
casts vary in the statically typed language. The result
of the study is, that the dynamically typed group
solved the complete programming tasks significantly
faster for most tasks --- but that for larger tasks
with a higher number of type casts no significant
difference could be found.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DSL '11 conference proceedings.",
}
@Article{Schultz:2012:MCP,
author = "Ulrik P. Schultz",
title = "Multilingual component programming in {Racket}",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "1--2",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047864",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the world of Racket, software systems consist of
inter-operating components in different programming
languages. A component's implementation language may
provide the full functionality of Racket, or it may
support a small domain-specific notation. Naturally,
Racketeers construct languages as Racket components and
compose them to create new languages. This talk will
present the ideas behind Racket: language-specific
components, the composition of components, and, most
importantly, the rich support for building languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Rosenmuller:2012:TDS,
author = "Marko Rosenm{\"u}ller and Norbert Siegmund and Mario
Pukall and Sven Apel",
title = "Tailoring dynamic software product lines",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "3--12",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047866",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software product lines (SPLs) and adaptive systems aim
at variability to cope with changing requirements.
Variability can be described in terms of features,
which are central for development and configuration of
SPLs. In traditional SPLs, features are bound
statically before runtime. By contrast, adaptive
systems support feature binding at runtime and are
sometimes called dynamic SPLs (DSPLs). DSPLs are
usually built from coarse-grained components, which
reduces the number of possible application scenarios.
To overcome this limitation, we closely integrate
static binding of traditional SPLs and runtime
adaptation of DSPLs. We achieve this integration by
statically generating a tailor-made DSPL from a highly
customizable SPL. The generated DSPL provides only the
runtime variability required by a particular
application scenario and the execution environment. The
DSPL supports self-configuration based on
coarse-grained modules. We provide a feature-based
adaptation mechanism that reduces the effort of
computing an optimal configuration at runtime. In a
case study, we demonstrate the practicability of our
approach and show that a seamless integration of static
binding and runtime adaptation reduces the complexity
of the adaptation process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Batory:2012:FIP,
author = "Don Batory and Peter H{\"o}fner and Jongwook Kim",
title = "Feature interactions, products, and composition",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "13--22",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047867",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The relationship between feature modules and feature
interactions is not well-understood. To explain classic
examples of feature interaction, we show that features
are not only composed sequentially, but also by
cross-product and interaction operations that
heretofore were implicit in the literature. Using the
Colored IDE (CIDE) tool as our starting point, we (a)
present a formal model of these operations, (b) show
how it connects and explains previously unrelated
results in Feature Oriented Software Development
(FOSD), and (c) describe a tool, based on our
formalism, that demonstrates how changes in composed
documents can be back-propagated to their original
feature module definitions, thereby improving FOSD
tooling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Ribeiro:2012:IFD,
author = "M{\'a}rcio Ribeiro and Felipe Queiroz and Paulo Borba
and T{\'a}rsis Tol{\^e}do and Claus Brabrand and
S{\'e}rgio Soares",
title = "On the impact of feature dependencies when maintaining
preprocessor-based software product lines",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "23--32",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047868",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "During Software Product Line (SPL) maintenance tasks,
Virtual Separation of Concerns (VSoC) allows the
programmer to focus on one feature and hide the others.
However, since features depend on each other through
variables and control-flow, feature modularization is
compromised since the maintenance of one feature may
break another. In this context, emergent interfaces can
capture dependencies between the feature we are
maintaining and the others, making developers aware of
dependencies. To better understand the impact of code
level feature dependencies during SPL maintenance, we
have investigated the following two questions: how
often methods with preprocessor directives contain
feature dependencies? How feature dependencies impact
maintenance effort when using VSoC and emergent
interfaces? Answering the former is important for
assessing how often we may face feature dependency
problems. Answering the latter is important to better
understand to what extent emergent interfaces
complement VSoC during maintenance tasks. To answer
them, we analyze 43 SPLs of different domains, size,
and languages. The data we collect from them complement
previous work on preprocessor usage. They reveal that
the feature dependencies we consider in this paper are
reasonably common in practice; and that emergent
interfaces can reduce maintenance effort during the SPL
maintenance tasks we regard here.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Neves:2012:ISE,
author = "La{\'\i}s Neves and Leopoldo Teixeira and
Dem{\'o}stenes Sena and Vander Alves and Uir{\'a}
Kulezsa and Paulo Borba",
title = "Investigating the safe evolution of software product
lines",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "33--42",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047869",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The adoption of a product line strategy can bring
significant productivity and time to market
improvements. However, evolving a product line is risky
because it might impact many products and their users.
So when evolving a product line to introduce new
features or to improve its design, it is important to
make sure that the behavior of existing products is not
affected. In fact, to preserve the behavior of existing
products one usually has to analyze different
artifacts, like feature models, configuration knowledge
and the product line core assets. To better understand
this process, in this paper we discover and analyze
concrete product line evolution scenarios and, based on
the results of this study, we describe a number of safe
evolution templates that developers can use when
working with product lines. For each template, we show
examples of their use in existing product lines. We
evaluate the templates by also analyzing the evolution
history of two different product lines and
demonstrating that they can express the corresponding
modifications and then help to avoid the mistakes that
we identified during our analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Hannousse:2012:SAA,
author = "Abdelhakim Hannousse and R{\'e}mi Douence and Gilles
Ardourel",
title = "Static analysis of aspect interaction and composition
in component models",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "43--52",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047871",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Component based software engineering and aspect
orientation are claimed to be two complementary
approaches. While the former ensures the modularity and
the reusability of software entities, the latter
enables the modularity of crosscutting concerns that
cannot be modularized as regular components. Nowadays,
several approaches and frameworks are dedicated to
integrate aspects into component models. However, when
several aspects are woven, aspects may interact with
each other which often results in undesirable behavior.
The contribution of this paper is twofold. First, we
show how aspectized component models can be formally
modeled in UPPAAL model checker in order to detect
negative interactions (a.k.a., interferences) among
aspects. Second, we provide an extendible catalog of
composition operators used for aspect composition. We
illustrate our general approach with an airport
Internet service example.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Otte:2012:ICB,
author = "William R. Otte and Aniruddha Gokhale and Douglas C.
Schmidt and Johnny Willemsen",
title = "Infrastructure for component-based {DDS} application
development",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "53--62",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047872",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Enterprise distributed real-time and embedded (DRE)
systems are increasingly being developed with the use
of component-based software techniques. Unfortunately,
commonly used component middleware platforms provide
limited support for event-based publish/subscribe
(pub/sub) mechanisms that meet both quality-of-service
(QoS) and configurability requirements of DRE systems.
On the other hand, although pub/sub technologies, such
as OMG Data Distribution Service (DDS), support a wide
range of QoS settings, the level of abstraction they
provide make it hard to configure them due to the
significant source-level configuration that must be
hard-coded at compile time or tailored at run-time
using proprietary, ad hoc configuration logic.
Moreover, developers of applications using native
pub/sub technologies must write large amounts of
boilerplate ``glue'' code to support run-time
configuration of QoS properties, which is tedious and
error-prone. This paper describes a novel, generative
approach that combines the strengths of QoS-enabled
pub/sub middleware with component-based middleware
technologies. In particular, this paper describes the
design and implementation of DDS4CIAO which addresses a
number of inherent and accidental complexities in the
DDS4CCM standard. DDS4CIAO simplifies the development,
deployment, and configuration of component-based DRE
systems that leverage DDS's powerful QoS capabilities
by provisioning DDS QoS policy settings and simplifying
the development of DDS applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Li:2012:GGP,
author = "Yulin Li and Gordon S. {Novak, Jr.}",
title = "Generation of geometric programs specified by
diagrams",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "63--72",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047874",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The GeoGram system [21] generates programs for
geometric computations by combining generic software
components as specified by diagrams constructed using a
graphical interface. The user specifies known and
desired quantities. As diagrams are constructed, the
system maintains symbolic geometric facts describing
the construction. Inferences based on the diagram are
used to derive new facts and to introduce new objects
based on geometric reasoning, to filter choices
presented to the user, to interpret the user's
intention in ambiguous cases, to detect
over-specification, and to generate the program. A
knowledge base of descriptions of generic software
components is used to prove that features of the
geometry can be computed from known values. These local
proofs are combined to guide generation of a program
that computes the desired values from inputs. The
library of generic geometric program components is used
to generate both in-line code and specialized
subroutines; partial evaluation improves the efficiency
of the generated code. The resulting program is
automatically translated into the desired language. The
program can also be run interactively to simulate the
geometry by generating graphical traces on the diagram
as input quantities are varied.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Steck:2012:MDE,
author = "Andreas Steck and Alex Lotz and Christian Schlegel",
title = "Model-driven engineering and run-time model-usage in
service robotics",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "73--82",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047875",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The development of service robots has gained more and
more attention over the last years. A major challenge
on the way towards industrial-strength service robotic
systems is to make the step from code-driven to
model-driven engineering. In this work we propose to
put models into the focus of the whole life-cycle of
robotic systems covering design-time as well as
run-time. We describe how to explicate parameters,
properties and resource information in the models at
design-time and how to take these information into
account by the run-time system of the robot to support
its decision making process. We underpin our work by an
exhaustive real-world example which is completely
developed with our tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Vermolen:2012:GDM,
author = "Sander Dani{\"e}l Vermolen and Guido Wachsmuth and
Eelco Visser",
title = "Generating database migrations for evolving {Web}
applications",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "83--92",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047876",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "WebDSL is a domain-specific language for the
implementation of dynamic web applications with a rich
data model. It provides developers with object-oriented
data modeling concepts but abstracts over
implementation details for persisting application data
in relational databases. When the underlying data model
of an application evolves, persisted application data
has to be migrated. While implementing migration at the
database level breaks the abstractions provided by
WebDSL, an implementation at the data model level
requires to intermingle migration with application
code. In this paper, we present a domain-specific
language for the coupled evolution of data models and
application data. It allows to specify data model
evolution as a separate concern at the data model level
and can be compiled to migration code at the database
level. Its linguistic integration with WebDSL enables
static checks for evolution validity and correctness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Danvy:2012:PFS,
author = "Olivier Danvy",
title = "Pragmatics for formal semantics",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "93--94",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047878",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This tech talk describes how to write and how to
inter-derive formal semantics for sequential
programming languages. The progress reported here is
(1) concrete guidelines to write each formal semantics
to alleviate their proof obligations, and (2) simple
calculational tools to obtain a formal semantics from
another.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Shubert:2012:AMB,
author = "Gary J. Shubert",
title = "Application of model based development to flexible
code generation",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "95--96",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047880",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This address will present the authors views and
perspectives on the past, present and future use of
model based development techniques to enable the
automated generation of source code and other forms of
programming. This address will discuss past and present
use of model based development and automated code
generation at Lockheed Martin, with special emphasis on
NASA's Orion Multi-Purpose Crew Vehicle Program. This
address will discuss the advantages and disadvantages,
associated with the current state of the practice
techniques and tools, used to automatically generate
source code from general purpose and domain specific
models. This address will discuss the obstacles and
enablers, associated with achieving the desired future
state of complete and efficient automated generation of
programming through transformation of general purpose
and domain specific models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Asai:2012:RDS,
author = "Kenichi Asai",
title = "Reflection in direct style",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "97--106",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047882",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A reflective language enables us to access, inspect,
and/or modify the language semantics from within the
same language framework. Although the degree of
semantics exposure differs from one language to
another, the most powerful approach, referred to as the
behavioral reflection, exposes the entire language
semantics (or the language interpreter) that defines
behavior of user programs for user
inspection/modification. In this paper, we deal with
the behavioral reflection in the context of a
functional language Scheme. In particular, we show how
to construct a reflective interpreter where user
programs are interpreted by the tower of metacircular
interpreters and have the ability to change any parts
of the interpreters during execution. Its distinctive
feature compared to the previous work is that the
metalevel interpreters observed by users are written in
direct style. Based on the past attempt of the present
author, the current work solves the level-shifting
anomaly by defunctionalizing and inspecting the top of
the continuation frames. The resulting system enables
us to freely go up and down the levels and
access/modify the direct-style metalevel interpreter.
This is in contrast to the previous system where
metalevel interpreters were written in
continuation-passing style (CPS) and only CPS functions
could be exposed to users for modification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Nystrom:2012:FRT,
author = "Nathaniel Nystrom and Derek White and Kishen Das",
title = "{Firepile}: run-time compilation for {GPUs} in
{Scala}",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "107--116",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047883",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent advances have enabled GPUs to be used as
general-purpose parallel processors on commodity
hardware for little cost. However, the ability to
program these devices has not kept up with their
performance. The programming model for GPUs has a
number of restrictions that make it difficult to
program. For example, software running on the GPU
cannot perform dynamic memory allocation, requiring the
programmer to pre-allocate all memory the GPU might
use. To achieve good performance, GPU programmers must
also be aware of how data is moved between host and GPU
memory and between the different levels of the GPU
memory hierarchy. We describe Firepile, a library for
GPU programming in Scala. The library enables a subset
of Scala to be executed on the GPU. Code trees can be
created from run-time function values, which can then
be analyzed and transformed to generate GPU code. A key
property of this mechanism is that it is modular:
unlike with other meta-programming constructs, the use
of code trees need not be exposed in the library
interface. Code trees are general and can be used by
library writers in other application domains. Our
experiments show Firepile users can achieve performance
comparable to C code targeted to the GPU with shorter,
simpler, and easier-to-understand code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Esmaeilsabzali:2012:MAC,
author = "Shahram Esmaeilsabzali and Bernd Fischer and Joanne M.
Atlee",
title = "Monitoring aspects for the customization of
automatically generated code for big-step models",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "117--126",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047884",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The output of a code generator is assumed to be
correct and not usually intended to be read or
modified; yet programmers are often interested in this,
e.g., to monitor a system property. Here, we consider
code customization for a family of code generators
associated with big-step executable modelling languages
(e.g., statecharts). We introduce a customization
language that allows us to express customization
scenarios for the generated code independently of a
specific big-step execution semantics. These
customization scenarios are all different forms of
runtime monitors, which lend themselves to a
principled, uniform implementation for observation and
code extension. A monitor is given in terms of the
enabledness and execution of the transitions of a model
and a reachability relation between two states of the
execution of the model during a big step. For each
monitor, we generate the aspect code that is
incorporated into the output of a code generator to
implement the monitor at the generated-code level.
Thus, we provide means for code analysis through using
the vocabulary of a model, rather than the detail of
the generated code. Our technique not only requires the
code generators to reveal only limited information
about their code generation mechanisms, but also keeps
the structure of the generated code intact. We
demonstrate how various useful properties of a model,
or a language, can be checked using our monitors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Lindeman:2012:DDD,
author = "Ricky T. Lindeman and Lennart C. L. Kats and Eelco
Visser",
title = "Declaratively defining domain-specific language
debuggers",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "127--136",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047885",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Tool support is vital to the effectiveness of
domain-specific languages. With language workbenches,
domain-specific languages and their tool support can be
generated from a combined, high-level specification.
This paper shows how such a specification can be
extended to describe a debugger for a language. To
realize this, we introduce a meta-language for
coordinating the debugger that abstracts over the
complexity of writing a debugger by hand. We describe
the implementation of a language-parametric
infrastructure for debuggers that can be instantiated
based on this specification. The approach is
implemented in the Spoofax language workbench and
validated through realistic case studies with the
Stratego transformation language and the WebDSL web
programming language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Arnoldus:2012:LMU,
author = "B. J. Arnoldus and M. G. J. van den Brand and A.
Serebrenik",
title = "Less is more: unparser-completeness of metalanguages
for template engines",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "137--146",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047887",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A code generator is a program translating an input
model into code. In this paper we focus on
template-based code generators in the context of the
model view controller architecture (MVC). The language
in which the code generator is written is known as a
metalanguage in the code generation parlance. The
metalanguage should be, on the one side, expressive
enough to be of practical value, and, on the other
side, restricted enough to enforce the separation
between the view and the model, according to the MVC.
In this paper we advocate the notion of
unparser-complete metalanguages as providing the right
level of expressivity. An unparser-complete
metalanguage is capable of expressing an unparser, a
code generator that translates any legal abstract
syntax tree into an equivalent sentence of the
corresponding context-free language. A metalanguage not
able to express an unparser will fail to produce all
sentences belonging to the corresponding context-free
language. A metalanguage able to express more than an
unparser will also be able to implement code violating
the model/view separation. We further show that a
metalanguage with the power of a linear deterministic
tree-to-string transducer is unparser-complete.
Moreover, this metalanguage has been successfully
applied in a non-trivial case study where an existing
code generator is refactored using templates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Slaatten:2012:TAG,
author = "Vidar Sl{\aa}tten and Frank Alexander Kraemer and
Peter Herrmann",
title = "Towards automatic generation of formal specifications
to validate and verify reliable distributed systems: a
method exemplified by an industrial case study",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "147--156",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047888",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The validation and verification of reliable systems is
a difficult and complex task, mainly for two reasons:
First, it is difficult to precisely state which formal
properties a system needs to fulfil to be of high
quality. Second, it is complex to automatically verify
such properties, due to the size of the analysis state
space which grows exponentially with the number of
components. We tackle these problems by a
tool-supported method which embeds application
functionality in building blocks that use UML
activities to describe their internal behaviour. To
describe their externally visible behaviour, we use a
combination of complementary interface contracts,
so-called ESMs and EESMs. In this paper, we present an
extension of the interface contracts, External
Reliability Contracts (ERCs), that capture failure
behaviour. This separation of different behavioural
aspects in separate descriptions facilitates a two-step
analysis, in which the first step is completely
automated and the second step is facilitated by an
automatic translation of the models to the input syntax
of the model checker TLC. Further, the cascade of
contracts is used to separate the work of domain and
reliability experts. The concepts are proposed with the
background of a real industry case, and we demonstrate
how the use of interface contracts leads to
significantly smaller state spaces in the analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Sobernig:2012:CCA,
author = "Stefan Sobernig and Patrick Gaubatz and Mark Strembeck
and Uwe Zdun",
title = "Comparing complexity of {API} designs: an exploratory
experiment on {DSL}-based framework integration",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "157--166",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047890",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded, textual DSLs are often provided as an API
wrapped around object-oriented application frameworks
to ease framework integration. While literature
presents claims that DSL-based application development
is beneficial, empirical evidence for this is rare. We
present the results of an experiment comparing the
complexity of three different object-oriented framework
APIs and an embedded, textual DSL. For this comparative
experiment, we implemented the same, non-trivial
application scenario using these four different APIs.
Then, we performed an Object-Points (OP) analysis,
yielding indicators for the API complexity specific to
each API variant. The main observation for our
experiment is that the embedded, textual DSL incurs the
smallest API complexity. Although the results are
exploratory, as well as limited to the given
application scenario and a single embedded DSL, our
findings can direct future empirical work. The
experiment design is applicable for similar API design
evaluations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Erdweg:2012:GLE,
author = "Sebastian Erdweg and Lennart C. L. Kats and Tillmann
Rendel and Christian K{\"a}stner and Klaus Ostermann
and Eelco Visser",
title = "Growing a language environment with editor libraries",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "167--176",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047891",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large software projects consist of code written in a
multitude of different (possibly domain-specific)
languages, which are often deeply interspersed even in
single files. While many proposals exist on how to
integrate languages semantically and syntactically, the
question of how to support this scenario in integrated
development environments (IDEs) remains open: How can
standard IDE services, such as syntax highlighting,
outlining, or reference resolving, be provided in an
extensible and compositional way, such that an open mix
of languages is supported in a single file? Based on
our library-based syntactic extension language for
Java, SugarJ, we propose to make IDEs extensible by
organizing editor services in editor libraries. Editor
libraries are libraries written in the object language,
SugarJ, and hence activated and composed through
regular import statements on a file-by-file basis. We
have implemented an IDE for editor libraries on top of
SugarJ and the Eclipse-based Spoofax language
workbench. We have validated editor libraries by
evolving this IDE into a fully-fledged and schema-aware
XML editor as well as an extensible Latex editor, which
we used for writing this paper.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Freeman:2012:HPH,
author = "John Freeman and Jaakko J{\"a}rvi and Wonseok Kim and
Mat Marcus and Sean Parent",
title = "Helping programmers help users",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "177--184",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047892",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "User interfaces exhibit a wide range of features that
are designed to assist users. Interaction with one
widget may trigger value changes, disabling, or other
behaviors in other widgets. Such automatic behavior may
be confusing or disruptive to users. Research
literature on user interfaces offers a number of
solutions, including interface features for explaining
or controlling these behaviors. To help programmers
help users, the implementation costs of these features
need to be much lower. Ideally, they could be generated
for free. This paper shows how several help and control
mechanisms can be implemented as algorithms and reused
across interfaces, making the cost of their adoption
negligible. Specifically, we describe generic help
mechanisms for visualizing data flow and explaining
command deactivation, and a mechanism for controlling
the flow of data. A reusable implementation of these
features is enabled by our property model framework,
where the data manipulated through a user interface is
modeled as a constraint system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Launchbury:2012:TBC,
author = "John Launchbury",
title = "Theorem-based circuit derivation in {Cryptol}",
journal = j-SIGPLAN,
volume = "47",
number = "3",
pages = "185--186",
month = mar,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2189751.2047894",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:00 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Even though step-by-step refinement has long been seen
as desirable, it is hard to find compelling industrial
applications of the technique. In theory, transforming
a high-level specification into a high-performance
implementation is an ideal means of producing a correct
design, but in practice it is hard to make it work, and
even harder to make it worthwhile. This talk describes
an exception. We introduce the domain-specific
language, Cryptol, and work up to a design experience
in which theorem-based refinement played a crucial role
in producing an industrial quality FPGA encryptor and
decryptor for AES. Quite simply, we are unlikely to
have succeeded without the technique. The Cryptol
specification language was designed by Galois for the
NSA as a public standard for specifying cryptographic
algorithms. A Cryptol reference specification can serve
as the formal documentation for a cryptographic module,
eliminating the need for separate and voluminous
English descriptions. Cryptol is fully executable,
allowing designers to experiment with their programs
incrementally as their designs evolve. Cryptol
compilers can generate C, C++, and Haskell software
implementations, and VHDL or Verilog HDL hardware
implementations. These generators can significantly
reduce overall life-cycle costs of cryptographic
solutions. For example, Cryptol allows engineers and
mathematicians to program cryptographic algorithms on
FPGAs as if they were writing software. The design
experience we describe runs as follows: we begin with a
specification for AES written in Cryptol, and over a
series of five design stages we produce an industrial
grade encrypt core. In each stage, we state theorems
which relate the component behaviors in one stage with
the corresponding behaviors in the refinement. The
resulting cores, running at 350Mhz-440Mhz depending on
the FPGA part, bear little relationship to the
original, except that the step-by-step theorems ensured
we had not gone astray. We then repeat the pattern in
generating a circuit for AES decrypt. While there are
many similarities between encrypt and decrypt in AES,
there are some crucial differences with regard to high
performance. First concerns the generation of key
material. The AES key is used as a seed for a specific
pseudo-random number generator which produces key
material for use in each of the AES rounds. For
encrypt, the key-generator runs in sync with the action
of encryption, so may be scheduled alongside it. For
decrypt, they run counter to one-another, creating a
major challenge to be overcome. Second, the generated
key material has an additional transformation applied
to it, which occurs deep in the middle of the high
performing core. Using theorems as stepping stones
along the way, we redesign the key expansion algorithm
so that it will run in sync with the decryption. We
then trace parallel steps to the derivation of encrypt,
establishing a series of commuting diagrams along the
way. Whenever we confronted bugs in the development
process, we produced many theorems to isolate the bugs,
using theorems as a principled kind of printf. When the
bugs were found and eradicated, we elided many of the
temporary theorems, leaving behind those that provided
important insights into the behavior of the code. This
talk is a story of the journey with demonstrations of
the tool at work. Its ultimate message is to highlight
the value of including a theorem facility within purely
functional domain-specific languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GCPE '11 conference proceedings.",
}
@Article{Larus:2012:CWC,
author = "James R. Larus",
title = "The cloud will change everything",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "1--2",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950367",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cloud computing is fast on its way to becoming a
meaningless, oversold marketing slogan. In the midst of
this hype, it is easy to overlook the fundamental
change that is occurring. Computation, which used to be
confined to the machine beside your desk, is
increasingly centralized in vast shared facilities and
at the same time liberated by battery-powered, wireless
devices. Performance, security, and reliability are no
longer problems that can be considered in isolation ---
the wires and software connecting pieces offer more
challenges and opportunities than components
themselves. The eXtreme Computing Group (XCG) in
Microsoft Research is taking a holistic approach to
research in this area, by bring together researchers
and developers with expertise in data center design,
computer architecture, operating systems, computer
security, programming language, mobile computation, and
user interfaces to tackle the challenges of cloud
computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Yuan:2012:ISD,
author = "Ding Yuan and Jing Zheng and Soyeon Park and Yuanyuan
Zhou and Stefan Savage",
title = "Improving software diagnosability via log
enhancement",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "3--14",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950369",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Diagnosing software failures in the field is
notoriously difficult, in part due to the fundamental
complexity of trouble-shooting any complex software
system, but further exacerbated by the paucity of
information that is typically available in the
production setting. Indeed, for reasons of both
overhead and privacy, it is common that only the
run-time log generated by a system (e.g., syslog) can
be shared with the developers. Unfortunately, the
ad-hoc nature of such reports are frequently
insufficient for detailed failure diagnosis. This paper
seeks to improve this situation within the rubric of
existing practice. We describe a tool, LogEnhancer that
automatically ``enhances'' existing logging code to aid
in future post-failure debugging. We evaluate
LogEnhancer on eight large, real-world applications and
demonstrate that it can dramatically reduce the set of
potential root failure causes that must be considered
during diagnosis while imposing negligible overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Veeraraghavan:2012:DPS,
author = "Kaushik Veeraraghavan and Dongyoon Lee and Benjamin
Wester and Jessica Ouyang and Peter M. Chen and Jason
Flinn and Satish Narayanasamy",
title = "{DoublePlay}: parallelizing sequential logging and
replay",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "15--26",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950370",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deterministic replay systems record and reproduce the
execution of a hardware or software system. In contrast
to replaying execution on uniprocessors, deterministic
replay on multiprocessors is very challenging to
implement efficiently because of the need to reproduce
the order or values read by shared memory operations
performed by multiple threads. In this paper, we
present DoublePlay, a new way to efficiently guarantee
replay on commodity multiprocessors. Our key insight is
that one can use the simpler and faster mechanisms of
single-processor record and replay, yet still achieve
the scalability offered by multiple cores, by using an
additional execution to parallelize the record and
replay of an application. DoublePlay timeslices
multiple threads on a single processor, then runs
multiple time intervals (epochs) of the program
concurrently on separate processors. This strategy,
which we call uniparallelism, makes logging much easier
because each epoch runs on a single processor (so
threads in an epoch never simultaneously access the
same memory) and different epochs operate on different
copies of the memory. Thus, rather than logging the
order of shared-memory accesses, we need only log the
order in which threads in an epoch are timesliced on
the processor. DoublePlay runs an additional execution
of the program on multiple processors to generate
checkpoints so that epochs run in parallel. We evaluate
DoublePlay on a variety of client, server, and
scientific parallel benchmarks; with spare cores,
DoublePlay reduces logging overhead to an average of
15\% with two worker threads and 28\% with four
threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Casper:2012:HAT,
author = "Jared Casper and Tayo Oguntebi and Sungpack Hong and
Nathan G. Bronson and Christos Kozyrakis and Kunle
Olukotun",
title = "Hardware acceleration of transactional memory on
commodity systems",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "27--38",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950372",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The adoption of transactional memory is hindered by
the high overhead of software transactional memory and
the intrusive design changes required by previously
proposed TM hardware. We propose that hardware to
accelerate software transactional memory (STM) can
reside outside an unmodified commodity processor core,
thereby substantially reducing implementation costs.
This paper introduces Transactional Memory Acceleration
using Commodity Cores (TMACC), a hardware-accelerated
TM system that does not modify the processor, caches,
or coherence protocol. We present a complete hardware
implementation of TMACC using a rapid prototyping
platform. Using this hardware, we implement two unique
conflict detection schemes which are accelerated using
Bloom filters on an FPGA. These schemes employ novel
techniques for tolerating the latency of fine-grained
asynchronous communication with an out-of-core
accelerator. We then conduct experiments to explore the
feasibility of accelerating TM without modifying
existing system hardware. We show that, for all but
short transactions, it is not necessary to modify the
processor to obtain substantial improvement in TM
performance. In these cases, TMACC outperforms an STM
by an average of 69\% in applications using
moderate-length transactions, showing maximum speedup
within 8\% of an upper bound on TM acceleration.
Overall, we demonstrate that hardware can substantially
accelerate the performance of an STM on unmodified
commodity processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Dalessandro:2012:HNC,
author = "Luke Dalessandro and Fran{\c{c}}ois Carouge and Sean
White and Yossi Lev and Mark Moir and Michael L. Scott
and Michael F. Spear",
title = "Hybrid {NOrec}: a case study in the effectiveness of
best effort hardware transactional memory",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "39--52",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional memory (TM) is a promising
synchronization mechanism for the next generation of
multicore processors. Best-effort Hardware
Transactional Memory (HTM) designs, such as Sun's
prototype Rock processor and AMD's proposed Advanced
Synchronization Facility (ASF), can efficiently execute
many transactions, but abort in some cases due to
various limitations. Hybrid TM systems can use a
compatible software TM (STM) in such cases. We
introduce a family of hybrid TMs built using the recent
NOrec STM algorithm that, unlike existing hybrid
approaches, provide both low overhead on hardware
transactions and concurrent execution of hardware and
software transactions. We evaluate implementations for
Rock and ASF, exploring how the differing HTM designs
affect optimization choices. Our investigation yields
valuable input for designers of future best-effort
HTMs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Singh:2012:EPS,
author = "Abhayendra Singh and Daniel Marino and Satish
Narayanasamy and Todd Millstein and Madan Musuvathi",
title = "Efficient processor support for {DRFx}, a memory model
with exceptions",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "53--66",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950375",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A longstanding challenge of shared-memory concurrency
is to provide a memory model that allows for efficient
implementation while providing strong and simple
guarantees to programmers. The C++0x and Java memory
models admit a wide variety of compiler and hardware
optimizations and provide sequentially consistent (SC)
semantics for data-race-free programs. However, they
either do not provide any semantics (C++0x) or provide
a hard-to-understand semantics (Java) for racy
programs, compromising the safety and debuggability of
such programs. In earlier work we proposed the DRFx
memory model, which addresses this problem by
dynamically detecting potential violations of SC due to
the interaction of compiler or hardware optimizations
with data races and halting execution upon detection.
In this paper, we present a detailed micro-architecture
design for supporting the DRFx memory model, formalize
the design and prove its correctness, and evaluate the
design using a hardware simulator. We describe a set of
DRFx-compliant complexity-effective optimizations which
allow us to attain performance close to that of TSO
(Total Store Model) and DRF0 while providing strong
guarantees for all programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Devietti:2012:RRC,
author = "Joseph Devietti and Jacob Nelson and Tom Bergan and
Luis Ceze and Dan Grossman",
title = "{RCDC}: a relaxed consistency deterministic computer",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "67--78",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950376",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Providing deterministic execution significantly
simplifies the debugging, testing, replication, and
deployment of multithreaded programs. Recent work has
developed deterministic multiprocessor architectures as
well as compiler and runtime systems that enforce
determinism in current hardware. Such work has
incidentally imposed strong memory-ordering properties.
Historically, memory ordering has been relaxed in favor
of higher performance in shared memory multiprocessors
and, interestingly, determinism exacerbates the cost of
strong memory ordering. Consequently, we argue that
relaxed memory ordering is vital to achieving faster
deterministic execution. This paper introduces RCDC, a
deterministic multiprocessor architecture that takes
advantage of relaxed memory orderings to provide
high-performance deterministic execution with low
hardware complexity. RCDC has two key innovations: a
hybrid HW/SW approach to enforcing determinism; and a
new deterministic execution strategy that leverages
data-race-free-based memory models (e.g., the models
for Java and C++) to improve performance and
scalability without sacrificing determinism, even in
the presence of races. In our hybrid HW/SW approach,
the only hardware mechanisms required are
software-controlled store buffering and support for
precise instruction counting; we do not require
speculation. A runtime system uses these mechanisms to
enforce determinism for arbitrary programs. We evaluate
RCDC using PARSEC benchmarks and show that relaxing
memory ordering leads to performance and scalability
close to nondeterministic execution without requiring
any form of speculation. We also compare our new
execution strategy to one based on TSO
(total-store-ordering) and show that some applications
benefit significantly from the extra relaxation. We
also evaluate a software-only implementation of our new
deterministic execution strategy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Burnim:2012:SCS,
author = "Jacob Burnim and George Necula and Koushik Sen",
title = "Specifying and checking semantic atomicity for
multithreaded programs",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "79--90",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950377",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In practice, it is quite difficult to write correct
multithreaded programs due to the potential for
unintended and nondeterministic interference between
parallel threads. A fundamental correctness property
for such programs is atomicity---a block of code in a
program is atomic if, for any parallel execution of the
program, there is an execution with the same overall
program behavior in which the block is executed
serially. We propose semantic atomicity, a
generalization of atomicity with respect to a
programmer-defined notion of equivalent behavior. We
propose an assertion framework in which a programmer
can use bridge predicates to specify noninterference
properties at the level of abstraction of their
application. Further, we propose a novel algorithm for
systematically testing atomicity specifications on
parallel executions with a bounded number of
interruptions---i.e. atomic blocks whose execution is
interleaved with that of other threads. We further
propose a set of sound heuristics and optional user
annotations that increase the efficiency of checking
atomicity specifications in the common case where the
specifications hold. We have implemented our assertion
framework for specifying and checking semantic
atomicity for parallel Java programs, and we have
written semantic atomicity specifications for a number
of benchmarks. We found that using bridge predicates
allowed us to specify the natural and intended atomic
behavior of a wider range of programs than did previous
approaches. Further, in checking our specifications, we
found several previously unknown bugs, including in the
widely-used java.util.concurrent library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Volos:2012:MLP,
author = "Haris Volos and Andres Jaan Tack and Michael M.
Swift",
title = "{Mnemosyne}: lightweight persistent memory",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "91--104",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950379",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "New storage-class memory (SCM) technologies, such as
phase-change memory, STT-RAM, and memristors, promise
user-level access to non-volatile storage through
regular memory instructions. These memory devices
enable fast user-mode access to persistence, allowing
regular in-memory data structures to survive system
crashes. In this paper, we present Mnemosyne, a simple
interface for programming with persistent memory.
Mnemosyne addresses two challenges: how to create and
manage such memory, and how to ensure consistency in
the presence of failures. Without additional
mechanisms, a system failure may leave data structures
in SCM in an invalid state, crashing the program the
next time it starts. In Mnemosyne, programmers declare
global persistent data with the keyword ``pstatic'' or
allocate it dynamically. Mnemosyne provides primitives
for directly modifying persistent variables and
supports consistent updates through a lightweight
transaction mechanism. Compared to past work on
disk-based persistent memory, Mnemosyne reduces latency
to storage by writing data directly to memory at the
granularity of an update rather than writing memory
pages back to disk through the file system. In tests
emulating the performance characteristics of
forthcoming SCMs, we show that Mnemosyne can persist
data as fast as 3 microseconds. Furthermore, it
provides a 35 percent performance increase when applied
in the OpenLDAP directory server. In microbenchmark
studies we find that Mnemosyne can be up to 1400\%
faster than alternative persistence strategies, such as
Berkeley DB or Boost serialization, that are designed
for disks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Coburn:2012:NHM,
author = "Joel Coburn and Adrian M. Caulfield and Ameen Akel and
Laura M. Grupp and Rajesh K. Gupta and Ranjit Jhala and
Steven Swanson",
title = "{NV-Heaps}: making persistent objects fast and safe
with next-generation, non-volatile memories",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "105--118",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950380",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Persistent, user-defined objects present an attractive
abstraction for working with non-volatile program
state. However, the slow speed of persistent storage
(i.e., disk) has restricted their design and limited
their performance. Fast, byte-addressable, non-volatile
technologies, such as phase change memory, will remove
this constraint and allow programmers to build
high-performance, persistent data structures in
non-volatile storage that is almost as fast as DRAM.
Creating these data structures requires a system that
is lightweight enough to expose the performance of the
underlying memories but also ensures safety in the
presence of application and system failures by avoiding
familiar bugs such as dangling pointers, multiple
free()s, and locking errors. In addition, the system
must prevent new types of hard-to-find pointer safety
bugs that only arise with persistent objects. These
bugs are especially dangerous since any corruption they
cause will be permanent. We have implemented a
lightweight, high-performance persistent object system
called NV-heaps that provides transactional semantics
while preventing these errors and providing a model for
persistence that is easy to use and reason about. We
implement search trees, hash tables, sparse graphs, and
arrays using NV-heaps, BerkeleyDB, and Stasis. Our
results show that NV-heap performance scales with
thread count and that data structures implemented using
NV-heaps out-perform BerkeleyDB and Stasis
implementations by 32x and 244x, respectively, by
avoiding the operating system and minimizing other
software overheads. We also quantify the cost of
enforcing the safety guarantees that NV-heaps provide
and measure the costs of NV-heap primitive
operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Schupbach:2012:DLA,
author = "Adrian Sch{\"u}pbach and Andrew Baumann and Timothy
Roscoe and Simon Peter",
title = "A declarative language approach to device
configuration",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "119--132",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950382",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "C remains the language of choice for hardware
programming (device drivers, bus configuration, etc.):
it is fast, allows low-level access, and is trusted by
OS developers. However, the algorithms required to
configure and reconfigure hardware devices and
interconnects are becoming more complex and diverse,
with the added burden of legacy support, quirks, and
hardware bugs to work around. Even programming PCI
bridges in a modern PC is a surprisingly complex
problem, and is getting worse as new functionality such
as hotplug appears. Existing approaches use relatively
simple algorithms, hard-coded in C and closely coupled
with low-level register access code, generally leading
to suboptimal configurations. We investigate the merits
and drawbacks of a new approach: separating hardware
configuration logic (algorithms to determine
configuration parameter values) from mechanism
(programming device registers). The latter we keep in
C, and the former we encode in a declarative
programming language with constraint-satisfaction
extensions. As a test case, we have implemented full
PCI configuration, resource allocation, and interrupt
assignment in the Barrelfish research operating system,
using a concise expression of efficient algorithms in
constraint logic programming. We show that the approach
is tractable, and can successfully configure a wide
range of PCs with competitive runtime cost. Moreover,
it requires about half the code of the C-based approach
in Linux while offering considerably more
functionality. Additionally it easily accommodates
adaptations such as hotplug, fixed regions, and
quirks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Ryzhyk:2012:IDD,
author = "Leonid Ryzhyk and John Keys and Balachandra Mirla and
Arun Raghunath and Mona Vij and Gernot Heiser",
title = "Improved device driver reliability through hardware
verification reuse",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "133--144",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950383",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Faulty device drivers are a major source of operating
system failures. We argue that the underlying cause of
many driver faults is the separation of two
highly-related tasks: device verification and driver
development. These two tasks have a lot in common, and
result in software that is conceptually and
functionally similar, yet kept totally separate. The
result is a particularly bad case of duplication of
effort: the verification code is correct, but is
discarded after the device has been manufactured; the
driver code is inferior, but used in actual device
operation. We claim that the two tasks, and the
software they produce, can and should be unified, and
this will result in drastic improvement of
device-driver quality and reduction in the development
cost and time to market. In this paper we propose a
device driver design and verification workflow that
achieves such unification. We apply this workflow to
develop and test drivers for four different I/O devices
and demonstrate that it improves the driver test
coverage and allows detecting driver defects that are
extremely hard to find using conventional testing
techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Hashmi:2012:CNI,
author = "Atif Hashmi and Andrew Nere and James Jamal Thomas and
Mikko Lipasti",
title = "A case for neuromorphic {ISAs}",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "145--158",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950385",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The desire to create novel computing systems, paired
with recent advances in neuroscientific understanding
of the brain, has led researchers to develop
neuromorphic architectures that emulate the brain. To
date, such models are developed, trained, and deployed
on the same substrate. However, excessive co-dependence
between the substrate and the algorithm prevents
portability, or at the very least requires
reconstructing and retraining the model whenever the
substrate changes. This paper proposes a well-defined
abstraction layer --- the Neuromorphic instruction set
architecture, or NISA --- that separates a neural
application's algorithmic specification from the
underlying execution substrate, and describes the Aivo
framework, which demonstrates the concrete advantages
of such an abstraction layer. Aivo consists of a NISA
implementation for a rate-encoded neuromorphic system
based on the cortical column abstraction, a
state-of-the-art integrated development and runtime
environment (IDE), and various profile-based
optimization tools. Aivo's IDE generates code for
emulating cortical networks on the host CPU, multiple
GPGPUs, or as boolean functions. Its runtime system can
deploy and adaptively optimize cortical networks in a
manner similar to conventional just-in-time compilers
in managed runtime systems (e.g. Java, C\#). We
demonstrate the abilities of the NISA abstraction by
constructing a cortical network model of the mammalian
visual cortex, deploying on multiple execution
substrates, and utilizing the various optimization
tools we have created. For this hierarchical
configuration, Aivo's profiling based network
optimization tools reduce the memory footprint by 50\%
and improve the execution time by a factor of 3x on the
host CPU. Deploying the same network on a single GPGPU
results in a 30x speedup. We further demonstrate that a
speedup of 480x can be achieved by deploying a
massively scaled cortical network across three GPGPUs.
Finally, converting a trained hierarchical network to
C/C++ boolean constructs on the host CPU results in 44x
speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Ransford:2012:MSS,
author = "Benjamin Ransford and Jacob Sorber and Kevin Fu",
title = "{Mementos}: system support for long-running
computation on {RFID}-scale devices",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "159--170",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950386",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transiently powered computing devices such as RFID
tags, kinetic energy harvesters, and smart cards
typically rely on programs that complete a task under
tight time constraints before energy starvation leads
to complete loss of volatile memory. Mementos is a
software system that transforms general-purpose
programs into interruptible computations that are
protected from frequent power losses by automatic,
energy-aware state checkpointing. Mementos comprises a
collection of optimization passes for the LLVM compiler
infrastructure and a linkable library that exercises
hardware support for energy measurement while managing
state checkpoints stored in nonvolatile memory. We
evaluate Mementos against diverse test cases in a
trace-driven simulator of transiently powered
RFID-scale devices. Although Mementos's energy checks
increase run time when energy is plentiful, they allow
Mementos to safely suspend execution when energy
dwindles, effectively spreading computation across zero
or more power failures. This paper's contributions are:
a study of the runtime environment for programs on
RFID-scale devices; an energy-aware state checkpointing
system for these devices that is implemented for the
MSP430 family of microcontrollers; and a trace-driven
simulator of transiently powered RFID-scale devices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Koukoumidis:2012:PC,
author = "Emmanouil Koukoumidis and Dimitrios Lymberopoulos and
Karin Strauss and Jie Liu and Doug Burger",
title = "Pocket cloudlets",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "171--184",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950387",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cloud services accessed through mobile devices suffer
from high network access latencies and are constrained
by energy budgets dictated by the devices' batteries.
Radio and battery technologies will improve over time,
but are still expected to be the bottlenecks in future
systems. Non-volatile memories (NVM), however, may
continue experiencing significant and steady
improvements in density for at least ten more years. In
this paper, we propose to leverage the abundance in
memory capacity of mobile devices to mitigate latency
and energy issues when accessing cloud services. We
first analyze NVM technology scaling trends, and then
propose a cloud service cache architecture that resides
on the mobile device's NVM (pocket cloudlet). This
architecture utilizes both individual user and
community access models to maximize its hit rate, and
subsequently reduce overall service latency and energy
consumption. As a showcase we present the design,
implementation and evaluation of PocketSearch, a search
and advertisement pocket cloudlet. We perform mobile
search characterization to guide the design of
PocketSearch and evaluate it with 200 million mobile
queries from the search logs of m.bing.com. We show
that PocketSearch can serve, on average, 66\% of the
web search queries submitted by an individual user
without having to use the slow 3G link, leading to 16x
service access speedup. Finally, based on experience
with PocketSearch we provide additional insight and
guidelines on how future pocket cloudlets should be
organized, from both an architectural and an operating
system perspective.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Sharma:2012:BMS,
author = "Navin Sharma and Sean Barker and David Irwin and
Prashant Shenoy",
title = "{Blink}: managing server clusters on intermittent
power",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "185--198",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950389",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reducing the energy footprint of data centers
continues to receive significant attention due to both
its financial and environmental impact. There are
numerous methods that limit the impact of both factors,
such as expanding the use of renewable energy or
participating in automated demand-response programs. To
take advantage of these methods, servers and
applications must gracefully handle intermittent
constraints in their power supply. In this paper, we
propose blinking---metered transitions between a
high-power active state and a low-power inactive
state---as the primary abstraction for conforming to
intermittent power constraints. We design Blink, an
application-independent hardware-software platform for
developing and evaluating blinking applications, and
define multiple types of blinking policies. We then use
Blink to design BlinkCache, a blinking version of
memcached, to demonstrate the effect of blinking on an
example application. Our results show that a
load-proportional blinking policy combines the
advantages of both activation and synchronous blinking
for realistic Zipf-like popularity distributions and
wind/solar power signals by achieving near optimal hit
rates (within 15\% of an activation policy), while also
providing fairer access to the cache (within 2\% of a
synchronous policy) for equally popular objects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Hoffmann:2012:DKR,
author = "Henry Hoffmann and Stelios Sidiroglou and Michael
Carbin and Sasa Misailovic and Anant Agarwal and Martin
Rinard",
title = "Dynamic knobs for responsive power-aware computing",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "199--212",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950390",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present PowerDial, a system for dynamically
adapting application behavior to execute successfully
in the face of load and power fluctuations. PowerDial
transforms static configuration parameters into dynamic
knobs that the PowerDial control system can manipulate
to dynamically trade off the accuracy of the
computation in return for reductions in the
computational resources that the application requires
to produce its results. These reductions translate
directly into performance improvements and power
savings. Our experimental results show that PowerDial
can enable our benchmark applications to execute
responsively in the face of power caps that would
otherwise significantly impair responsiveness. They
also show that PowerDial can significantly reduce the
number of machines required to service intermittent
load spikes, enabling reductions in power and capital
costs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Liu:2012:FSD,
author = "Song Liu and Karthik Pattabiraman and Thomas
Moscibroda and Benjamin G. Zorn",
title = "{Flikker}: saving {DRAM} refresh-power through
critical data partitioning",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "213--224",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950391",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy has become a first-class design constraint in
computer systems. Memory is a significant contributor
to total system power. This paper introduces Flikker,
an application-level technique to reduce refresh power
in DRAM memories. Flikker enables developers to specify
critical and non-critical data in programs and the
runtime system allocates this data in separate parts of
memory. The portion of memory containing critical data
is refreshed at the regular refresh-rate, while the
portion containing non-critical data is refreshed at
substantially lower rates. This partitioning saves
energy at the cost of a modest increase in data
corruption in the non-critical data. Flikker thus
exposes and leverages an interesting trade-off between
energy consumption and hardware correctness. We show
that many applications are naturally tolerant to errors
in the non-critical data, and in the vast majority of
cases, the errors have little or no impact on the
application's final outcome. We also find that Flikker
can save between 20-25\% of the power consumed by the
memory sub-system in a mobile device, with negligible
impact on application performance. Flikker is
implemented almost entirely in software, and requires
only modest changes to the hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Deng:2012:MAL,
author = "Qingyuan Deng and David Meisner and Luiz Ramos and
Thomas F. Wenisch and Ricardo Bianchini",
title = "{MemScale}: active low-power modes for main memory",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "225--238",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950392",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Main memory is responsible for a large and increasing
fraction of the energy consumed by servers. Prior work
has focused on exploiting DRAM low-power states to
conserve energy. However, these states require entire
DRAM ranks to be idled, which is difficult to achieve
even in lightly loaded servers. In this paper, we
propose to conserve memory energy while improving its
energy-proportionality by creating active low-power
modes for it. Specifically, we propose MemScale, a
scheme wherein we apply dynamic voltage and frequency
scaling (DVFS) to the memory controller and dynamic
frequency scaling (DFS) to the memory channels and DRAM
devices. MemScale is guided by an operating system
policy that determines the DVFS/DFS mode of the memory
subsystem based on the current need for memory
bandwidth, the potential energy savings, and the
performance degradation that applications are willing
to withstand. Our results demonstrate that MemScale
reduces energy consumption significantly compared to
modern memory energy management approaches. We conclude
that the potential benefits of the MemScale mechanisms
and policy more than compensate for their small
hardware cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Gao:2012:TMH,
author = "Qi Gao and Wenbin Zhang and Zhezhe Chen and Mai Zheng
and Feng Qin",
title = "{2ndStrike}: toward manifesting hidden concurrency
typestate bugs",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "239--250",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950394",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrency bugs are becoming increasingly prevalent
in the multi-core era. Recently, much research has
focused on data races and atomicity violation bugs,
which are related to low-level memory accesses.
However, a large number of concurrency typestate bugs
such as ``invalid reads to a closed file from a
different thread'' are under-studied. These concurrency
typestate bugs are important yet challenging to study
since they are mostly relevant to high-level program
semantics. This paper presents 2ndStrike, a method to
manifest hidden concurrency typestate bugs in software
testing. Given a state machine describing correct
program behavior on certain object typestates,
2ndStrike profiles runtime events related to the
typestates and thread synchronization. Based on the
profiling results, 2ndStrike then identifies bug
candidates, each of which is a pair of runtime events
that would cause typestate violation if the event order
is reversed. Finally, 2ndStrike re-executes the program
with controlled thread interleaving to manifest bug
candidates. We have implemented a prototype of
2ndStrike on Linux and have illustrated our idea using
three types of concurrency typestate bugs, including
invalid file operation, invalid pointer dereference,
and invalid lock operation. We have evaluated 2ndStrike
with six real world bugs (including one previously
unknown bug) from three open-source server and desktop
programs (i.e., MySQL, Mozilla, pbzip2). Our
experimental results show that 2ndStrike can
effectively and efficiently manifest all six software
bugs, most of which are difficult or impossible to
manifest using stress testing or active testing
techniques that are based on data race/atomicity
violation. Additionally, 2ndStrike reports no false
positives, provides detailed bug reports for each
manifested bug, and can consistently reproduce the bug
after manifesting it once.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Zhang:2012:CDC,
author = "Wei Zhang and Junghee Lim and Ramya Olichandran and
Joel Scherpelz and Guoliang Jin and Shan Lu and Thomas
Reps",
title = "{ConSeq}: detecting concurrency bugs through
sequential errors",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "251--264",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950395",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrency bugs are caused by non-deterministic
interleavings between shared memory accesses. Their
effects propagate through data and control dependences
until they cause software to crash, hang, produce
incorrect output, etc. The lifecycle of a bug thus
consists of three phases: (1) triggering, (2)
propagation, and (3) failure. Traditional techniques
for detecting concurrency bugs mostly focus on phase
(1)--i.e., on finding certain structural patterns of
interleavings that are common triggers of concurrency
bugs, such as data races. This paper explores a
consequence-oriented approach to improving the accuracy
and coverage of state-space search and bug detection.
The proposed approach first statically identifies
potential failure sites in a program binary (i.e., it
first considers a phase (3) issue). It then uses static
slicing to identify critical read instructions that are
highly likely to affect potential failure sites through
control and data dependences (phase (2)). Finally, it
monitors a single (correct) execution of a concurrent
program and identifies suspicious interleavings that
could cause an incorrect state to arise at a critical
read and then lead to a software failure (phase (1)).
ConSeq's backwards approach, (3)!(2)!(1), provides
advantages in bug-detection coverage and accuracy but
is challenging to carry out. ConSeq makes it feasible
by exploiting the empirical observation that phases (2)
and (3) usually are short and occur within one thread.
Our evaluation on large, real-world C/C++ applications
shows that ConSeq detects more bugs than traditional
approaches and has a much lower false-positive rate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Chipounov:2012:SPV,
author = "Vitaly Chipounov and Volodymyr Kuznetsov and George
Candea",
title = "{S2E}: a platform for in-vivo multi-path analysis of
software systems",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "265--278",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950396",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents S2E, a platform for analyzing the
properties and behavior of software systems. We
demonstrate S2E's use in developing practical tools for
comprehensive performance profiling, reverse
engineering of proprietary software, and bug finding
for both kernel-mode and user-mode binaries. Building
these tools on top of S2E took less than 770 LOC and 40
person-hours each. S2E's novelty consists of its
ability to scale to large real systems, such as a full
Windows stack. S2E is based on two new ideas: selective
symbolic execution, a way to automatically minimize the
amount of code that has to be executed symbolically
given a target analysis, and relaxed execution
consistency models, a way to make principled
performance/accuracy trade-offs in complex analyses.
These techniques give S2E three key abilities: to
simultaneously analyze entire families of execution
paths, instead of just one execution at a time; to
perform the analyses in-vivo within a real software
stack--user programs, libraries, kernel, drivers,
etc.--instead of using abstract models of these layers;
and to operate directly on binaries, thus being able to
analyze even proprietary software. Conceptually, S2E is
an automated path explorer with modular path analyzers:
the explorer drives the target system down all
execution paths of interest, while analyzers check
properties of each such path (e.g., to look for bugs)
or simply collect information (e.g., count page
faults). Desired paths can be specified in multiple
ways, and S2E users can either combine existing
analyzers to build a custom analysis tool, or write new
analyzers using the S2E API.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Hofmann:2012:EOS,
author = "Owen S. Hofmann and Alan M. Dunn and Sangman Kim and
Indrajit Roy and Emmett Witchel",
title = "Ensuring operating system kernel integrity with
{OSck}",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "279--290",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950398",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Kernel rootkits that modify operating system state to
avoid detection are a dangerous threat to system
security. This paper presents OSck, a system that
discovers kernel rootkits by detecting malicious
modifications to operating system data. OSck integrates
and extends existing techniques for detecting rootkits,
and verifies safety properties for large portions of
the kernel heap with minimal overhead. We deduce type
information for verification by analyzing unmodified
kernel source code and in-memory kernel data
structures. High-performance integrity checks that
execute concurrently with a running operating system
create data races, and we demonstrate a deterministic
solution for ensuring kernel memory is in a consistent
state. We introduce two new classes of kernel rootkits
that are undetectable by current systems, motivating
the need for the OSck API that allows kernel developers
to conveniently specify arbitrary integrity
properties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Porter:2012:RLT,
author = "Donald E. Porter and Silas Boyd-Wickizer and Jon
Howell and Reuben Olinsky and Galen C. Hunt",
title = "Rethinking the library {OS} from the top down",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "291--304",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950399",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper revisits an old approach to operating
system construction, the library OS, in a new context.
The idea of the library OS is that the personality of
the OS on which an application depends runs in the
address space of the application. A small, fixed set of
abstractions connects the library OS to the host OS
kernel, offering the promise of better system security
and more rapid independent evolution of OS components.
We describe a working prototype of a Windows 7 library
OS that runs the latest releases of major applications
such as Microsoft Excel, PowerPoint, and Internet
Explorer. We demonstrate that desktop sharing across
independent, securely isolated, library OS instances
can be achieved through the pragmatic reuse of
net-working protocols. Each instance has significantly
lower overhead than a full VM bundled with an
application: a typical application adds just 16MB of
working set and 64MB of disk footprint. We contribute a
new ABI below the library OS that enables application
mobility. We also show that our library OS can address
many of the current uses of hardware virtual machines
at a fraction of the overheads. This paper describes
the first working prototype of a full commercial OS
redesigned as a library OS capable of running
significant applications. Our experience shows that the
long-promised benefits of the library OS approach
better protection of system integrity and rapid system
evolution are readily obtainable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Palix:2012:FLT,
author = "Nicolas Palix and Ga{\"e}l Thomas and Suman Saha and
Christophe Calv{\`e}s and Julia Lawall and Gilles
Muller",
title = "Faults in {Linux}: ten years later",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "305--318",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950401",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In 2001, Chou et al. published a study of faults found
by applying a static analyzer to Linux versions 1.0
through 2.4.1. A major result of their work was that
the drivers directory contained up to 7 times more of
certain kinds of faults than other directories. This
result inspired a number of development and research
efforts on improving the reliability of driver code.
Today Linux is used in a much wider range of
environments, provides a much wider range of services,
and has adopted a new development and release model.
What has been the impact of these changes on code
quality? Are drivers still a major problem? To answer
these questions, we have transported the experiments of
Chou et al. to Linux versions 2.6.0 to 2.6.33, released
between late 2003 and early 2010. We find that Linux
has more than doubled in size during this period, but
that the number of faults per line of code has been
decreasing. And, even though drivers still accounts for
a large part of the kernel code and contains the most
faults, its fault rate is now below that of other
directories, such as arch (HAL) and fs (file systems).
These results can guide further development and
research efforts. To enable others to continually
update these results as Linux evolves, we define our
experimental protocol and make our checkers and results
available in a public archive.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Esmaeilzadeh:2012:LBL,
author = "Hadi Esmaeilzadeh and Ting Cao and Yang Xi and Stephen
M. Blackburn and Kathryn S. McKinley",
title = "Looking back on the language and hardware revolutions:
measured power, performance, and scaling",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "319--332",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950402",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper reports and analyzes measured chip power
and performance on five process technology generations
executing 61 diverse benchmarks with a rigorous
methodology. We measure representative Intel IA32
processors with technologies ranging from 130nm to 32nm
while they execute sequential and parallel benchmarks
written in native and managed languages. During this
period, hardware and software changed substantially:
(1) hardware vendors delivered chip multiprocessors
instead of uniprocessors, and independently (2)
software developers increasingly chose managed
languages instead of native languages. This
quantitative data reveals the extent of some known and
previously unobserved hardware and software trends. Two
themes emerge. (I) Workload: The power, performance,
and energy trends of native workloads do not
approximate managed workloads. For example, (a) the
SPEC CPU2006 native benchmarks on the i7 (45) and i5
(32) draw significantly less power than managed or
scalable native benchmarks; and (b) managed runtimes
exploit parallelism even when running single-threaded
applications. The results recommend architects always
include native and managed workloads when designing and
evaluating energy efficient hardware. (II)
Architecture: Clock scaling, microarchitecture,
simultaneous multithreading, and chip multiprocessors
each elicit a huge variety of power, performance, and
energy responses. This variety and the difficulty of
obtaining power measurements recommends exposing
on-chip power meters and when possible structure
specific power meters for cores, caches, and other
structures. Just as hardware event counters provide a
quantitative grounding for performance innovations,
power meters are necessary for optimizing energy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Nguyen:2012:SCS,
author = "Donald Nguyen and Keshav Pingali",
title = "Synthesizing concurrent schedulers for irregular
algorithms",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "333--344",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950404",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scheduling is the assignment of tasks or activities to
processors for execution, and it is an important
concern in parallel programming. Most prior work on
scheduling has focused either on static scheduling of
applications in which the dependence graph is known at
compile-time or on dynamic scheduling of independent
loop iterations such as in OpenMP. In irregular
algorithms, dependences between activities are complex
functions of runtime values so these algorithms are not
amenable to compile-time analysis nor do they consist
of independent activities. Moreover, the amount of work
can vary dramatically with the scheduling policy. To
handle these complexities, implementations of irregular
algorithms employ carefully handcrafted,
algorithm-specific schedulers but these schedulers are
themselves parallel programs, complicating the parallel
programming problem further. In this paper, we present
a flexible and efficient approach for specifying and
synthesizing scheduling policies for irregular
algorithms. We develop a simple compositional
specification language and show how it can concisely
encode scheduling policies in the literature. Then, we
show how to synthesize efficient parallel schedulers
from these specifications. We evaluate our approach for
five irregular algorithms on three multicore
architectures and show that (1) the performance of some
algorithms can improve by orders of magnitude with the
right scheduling policy, and (2) for the same policy,
the overheads of our synthesized schedulers are
comparable to those of fixed-function schedulers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Hoang:2012:ECT,
author = "Giang Hoang and Robby Bruce Findler and Russ Joseph",
title = "Exploring circuit timing-aware language and
compilation",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "345--356",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950405",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "By adjusting the design of the ISA and enabling
circuit timing-sensitive optimizations in a compiler,
we can more effectively exploit timing speculation.
While there has been growing interest in systems that
leverage circuit-level timing speculation to improve
the performance and power-efficiency of processors,
most of the innovation has been at the
microarchitectural level. We make the observation that
some code sequences place greater demand on circuit
timing deadlines than others. Furthermore, by
selectively replacing these codes with instruction
sequences which are semantically equivalent but reduce
activity on timing critical circuit paths, we can
trigger fewer timing errors and hence reduce recovery
costs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Farhad:2012:OAM,
author = "Sardar M. Farhad and Yousun Ko and Bernd Burgstaller
and Bernhard Scholz",
title = "Orchestration by approximation: mapping stream
programs onto multicore architectures",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "357--368",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950406",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel 2-approximation algorithm for
deploying stream graphs on multicore computers and a
stream graph transformation that eliminates
bottlenecks. The key technical insight is a data rate
transfer model that enables the computation of a
``closed form'', i.e., the data rate transfer function
of an actor depending on the arrival rate of the stream
program. A combinatorial optimization problem uses the
closed form to maximize the throughput of the stream
program. Although the problem is inherently NP-hard, we
present an efficient and effective 2-approximation
algorithm that provides a lower bound on the quality of
the solution. We introduce a transformation that uses
the closed form to identify and eliminate bottlenecks.
We show experimentally that state-of-the art integer
linear programming approaches for orchestrating stream
graphs are (1) intractable or at least impractical for
larger stream graphs and larger number of processors
and (2) our 2-approximation algorithm is highly
efficient and its results are close to the optimal
solution for a standard set of StreamIt benchmark
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Zhang:2012:FED,
author = "Eddy Z. Zhang and Yunlian Jiang and Ziyu Guo and Kai
Tian and Xipeng Shen",
title = "On-the-fly elimination of dynamic irregularities for
{GPU} computing",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "369--380",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950408",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The power-efficient massively parallel Graphics
Processing Units (GPUs) have become increasingly
influential for general-purpose computing over the past
few years. However, their efficiency is sensitive to
dynamic irregular memory references and control flows
in an application. Experiments have shown great
performance gains when these irregularities are
removed. But it remains an open question how to achieve
those gains through software approaches on modern GPUs.
This paper presents a systematic exploration to tackle
dynamic irregularities in both control flows and memory
references. It reveals some properties of dynamic
irregularities in both control flows and memory
references, their interactions, and their relations
with program data and threads. It describes several
heuristics-based algorithms and runtime adaptation
techniques for effectively removing dynamic
irregularities through data reordering and job
swapping. It presents a framework, G-Streamline, as a
unified software solution to dynamic irregularities in
GPU computing. G-Streamline has several distinctive
properties. It is a pure software solution and works on
the fly, requiring no hardware extensions or offline
profiling. It treats both types of irregularities at
the same time in a holistic fashion, maximizing the
whole-program performance by resolving conflicts among
optimizations. Its optimization overhead is largely
transparent to GPU kernel executions, jeopardizing no
basic efficiency of the GPU application. Finally, it is
robust to the presence of various complexities in GPU
applications. Experiments show that G-Streamline is
effective in reducing dynamic irregularities in GPU
computing, producing speedups between 1.07 and 2.5 for
a variety of applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Hormati:2012:SPS,
author = "Amir H. Hormati and Mehrzad Samadi and Mark Woh and
Trevor Mudge and Scott Mahlke",
title = "{Sponge}: portable stream programming on graphics
engines",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "381--392",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950409",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphics processing units (GPUs) provide a low cost
platform for accelerating high performance
computations. The introduction of new programming
languages, such as CUDA and OpenCL, makes GPU
programming attractive to a wide variety of
programmers. However, programming GPUs is still a
cumbersome task for two primary reasons: tedious
performance optimizations and lack of portability.
First, optimizing an algorithm for a specific GPU is a
time-consuming task that requires a thorough
understanding of both the algorithm and the underlying
hardware. Unoptimized CUDA programs typically only
achieve a small fraction of the peak GPU performance.
Second, GPU code lacks efficient portability as code
written for one GPU can be inefficient when executed on
another. Moving code from one GPU to another while
maintaining the desired performance is a non-trivial
task often requiring significant modifications to
account for the hardware differences. In this work, we
propose Sponge, a compilation framework for GPUs using
synchronous data flow streaming languages. Sponge is
capable of performing a wide variety of optimizations
to generate efficient code for graphics engines. Sponge
alleviates the problems associated with current GPU
programming methods by providing portability across
different generations of GPUs and CPUs, and a better
abstraction of the hardware details, such as the memory
hierarchy and threading model. Using streaming, we
provide a write-once software paradigm and rely on the
compiler to automatically create optimized CUDA code
for a wide variety of GPU targets. Sponge's compiler
optimizations improve the performance of the baseline
CUDA implementations by an average of 3.2x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Kamruzzaman:2012:ICP,
author = "Md Kamruzzaman and Steven Swanson and Dean M.
Tullsen",
title = "Inter-core prefetching for multicore processors using
migrating helper threads",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "393--404",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950411",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multicore processors have become ubiquitous in today's
systems, but exploiting the parallelism they offer
remains difficult, especially for legacy application
and applications with large serial components. The
challenge, then, is to develop techniques that allow
multiple cores to work in concert to accelerate a
single thread. This paper describes inter-core
prefetching, a technique to exploit multiple cores to
accelerate a single thread. Inter-core prefetching
extends existing work on helper threads for SMT
machines to multicore machines. Inter-core prefetching
uses one compute thread and one or more prefetching
threads. The prefetching threads execute on cores that
would otherwise be idle, prefetching the data that the
compute thread will need. The compute thread then
migrates between cores, following the path of the
prefetch threads, and finds the data already waiting
for it. Inter-core prefetching works with existing
hardware and existing instruction set architectures.
Using a range of state-of-the-art multiprocessors, this
paper characterizes the potential benefits of the
technique with microbenchmarks and then measures its
impact on a range of memory intensive applications. The
results show that inter-core prefetching improves
performance by an average of 31 to 63\%, depending on
the architecture, and speeds up some applications by as
much as 2.8$ \times $. It also demonstrates that
inter-core prefetching reduces energy consumption by
between 11 and 26\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Hayashizaki:2012:IPT,
author = "Hiroshige Hayashizaki and Peng Wu and Hiroshi Inoue
and Mauricio J. Serrano and Toshio Nakatani",
title = "Improving the performance of trace-based systems by
false loop filtering",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "405--418",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950412",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Trace-based compilation is a promising technique for
language compilers and binary translators. It offers
the potential to expand the compilation scopes that
have traditionally been limited by method boundaries.
Detecting repeating cyclic execution paths and
capturing the detected repetitions into traces is a key
requirement for trace selection algorithms to achieve
good optimization and performance with small amounts of
code. One important class of repetition detection is
cyclic-path-based repetition detection, where a cyclic
execution path (a path that starts and ends at the same
instruction address) is detected as a repeating cyclic
execution path. However, we found many cyclic paths
that are not repeating cyclic execution paths, which we
call false loops. A common class of false loops occurs
when a method is invoked from multiple call-sites. A
cycle is formed between two invocations of the method
from different call-sites, but which does not represent
loops or recursion. False loops can result in shorter
traces and smaller compilation scopes, and degrade the
performance. We propose false loop filtering, an
approach to reject false loops in the repetition
detection step of trace selection, and a technique
called false loop filtering by call-stack-comparison,
which rejects a cyclic path as a false loop if the call
stacks at the beginning and the end of the cycle are
different. We applied false loop filtering to our
trace-based Java\TM{} JIT compiler that is based on
IBM's J9 JVM. We found that false loop filtering
achieved an average improvement of 16\% and 10\% for
the DaCapo benchmark when applied to two baseline trace
selection algorithms, respectively, with up to 37\%
improvement for individual benchmarks. In the end, with
false loop filtering, our trace-based JIT achieves a
performance comparable to that of the method-based J9
JVM/JIT using the corresponding optimization level.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Xue:2012:RJC,
author = "Jingling Xue",
title = "Rethinking {Java} call stack design for tiny embedded
devices",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "1--10",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248420",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "The ability of tiny embedded devices to run large
feature-rich programs is typically constrained by the
amount of memory installed on such devices.
Furthermore, the useful operation of these devices in
wireless sensor applications is limited by their
battery life. This paper presents a call stack redesign
targeted at an efficient use of RAM storage and CPU
cycles by a Java program running on a wireless sensor
mote. Without compromising the application programs,
our call stack redesign saves 30\% of RAM, on average,
evaluated over a large number of benchmarks. On the
same set of bench-marks, our design also avoids
frequent RAM allocations and deallocations, resulting
in average 80\% fewer memory operations and 23\% faster
program execution. These may be critical improvements
for tiny embedded devices that are equipped with small
amount of RAM and limited battery life. However, our
call stack redesign is equally effective for any
complex multi-threaded object oriented program
developed for desktop computers. We describe the
redesign, measure its performance and report the
resulting savings in RAM and execution time for a wide
variety of programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sallenave:2012:LGE,
author = "Olivier Sallenave and Roland Ducournau",
title = "Lightweight generics in embedded systems through
static analysis",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "11--20",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248421",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Low-end embedded systems are still programmed in C and
assembly, and adopting high-level languages such as C\#
should reduce the length of their development cycles.
For these systems, code size is a major concern, but
run-time efficiency should also be reasonable ---
programmers will not migrate to C\# unless the overhead
compared with C is insignificant. In this paper, we
propose a static approach based on whole program
optimization for implementing {.NET} generics in such
systems. Indeed, the implementation of run-time
generics involves a tradeoff between size and run-time
efficiency. In this proposal, generic instances are
detected through a generalization of RTA to parametric
polymorphism. Also, we propose an implementation scheme
which employs code sharing and more effective coercions
than boxing. Unlike existing implementation schemes, it
is scalable in the number of generic instances without
involving boxing and unboxing in a systematic way.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kyle:2012:EPI,
author = "Stephen Kyle and Igor B{\"o}hm and Bj{\"o}rn Franke
and Hugh Leather and Nigel Topham",
title = "Efficiently parallelizing instruction set simulation
of embedded multi-core processors using region-based
just-in-time dynamic binary translation",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "21--30",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248422",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Embedded systems, as typified by modern mobile phones,
are already seeing a drive toward using multi-core
processors. The number of cores will likely increase
rapidly in the future. Engineers and researchers need
to be able to simulate systems, as they are expected to
be in a few generations time, running simulations of
many-core devices on today's multi-core machines. These
requirements place heavy demands on the scalability of
simulation engines, the fastest of which have typically
evolved from just-in-time (Jit) dynamic binary
translators (Dbt). Existing work aimed at parallelizing
Dbt simulators has focused exclusively on trace-based
Dbt, wherein linear execution traces or perhaps trees
thereof are the units of translation. Region-based Dbt
simulators have not received the same attention and
require different techniques than their trace-based
cousins. In this paper we develop an innovative
approach to scaling multi-core, embedded simulation
through region-based Dbt. We initially modify the Jit
code generator of such a simulator to emit code that
does not depend on a particular thread with its
thread-specific context and is, therefore,
thread-agnostic. We then demonstrate that this
thread-agnostic code generation is comparable to
thread-specific code with respect to performance, but
also enables the sharing of JIT-compiled regions
between different threads. This sharing optimisation,
in turn, leads to significant performance improvements
for multi-threaded applications. In fact, our results
confirm that an average of 76\% of all JIT-compiled
regions can be shared between 128 threads in
representative, parallel workloads. We demonstrate that
this translates into an overall performance improvement
by 1.44x on average and up to 2.40x across 12
multi-threaded benchmarks taken from the Splash-2
benchmark suite, targeting our high-performance
multi-core Dbt simulator for embedded Arc processors
running on a 4-core Intel host machine.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Huang:2012:WAR,
author = "Yazhi Huang and Mengying Zhao and Chun Jason Xue",
title = "{WCET}-aware re-scheduling register allocation for
real-time embedded systems with clustered {VLIW}
architecture",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "31--40",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248424",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Worst-Case Execution Time (WCET) is one of the most
important metrics in real-time embedded system design.
For embedded systems with clustered VLIW architecture,
register allocation, instruction scheduling, and
cluster assignment are three key activities to pursue
code optimization which have profound impact on WCET.
At the same time, these three activities exhibit a
phase ordering problem: Independently performing
register allocation, scheduling and cluster assignment
could have a negative effect on the other phases,
thereby generating sub-optimal compiled codes. In this
paper, a compiler level optimization, namely WCET-aware
Re-scheduling Register Allocation (WRRA), is proposed
to achieve WCET minimization for real-time embedded
systems with clustered VLIW architecture. The novelty
of the proposed approach is that the effects of
register allocation, instruction scheduling and cluster
assignment on the quality of generated code are taken
into account for WCET minimization. These three
compilation processes are integrated into a single
phase to obtain a balanced result. The proposed
technique is implemented in Trimaran 4.0. The
experimental results show that the proposed technique
can reduce WCET effectively, by 33\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wan:2012:WAD,
author = "Qing Wan and Hui Wu and Jingling Xue",
title = "{WCET}-aware data selection and allocation for
scratchpad memory",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "41--50",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248425",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "In embedded systems, SPM (scratchpad memory) is an
attractive alternative to cache memory due to its lower
energy consumption and higher predictability of program
execution. This paper studies the problem of placing
variables of a program into an SPM such that its WCET
(worst-case execution time) is minimized. We propose an
efficient dynamic approach that comprises two novel
heuristics. The first heuristic iteratively selects a
most beneficial variable as an SPM resident candidate
based on its impact on the k longest paths of the
program. The second heuristic incrementally allocates
each SPM resident candidate to the SPM based on graph
coloring and acyclic graph orientation. We have
evaluated our approach by comparing with an ILP-based
approach and a longest-path-based greedy approach using
the eight benchmarks selected from Powerstone and
M{\"a}lardalen WCET Benchmark suites under three
different SPM configurations. Our approach achieves up
to 21\% and 43\% improvements in WCET reduction over
the ILP-based approach and the greedy approach,
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gerard:2012:MMO,
author = "L{\'e}onard G{\'e}rard and Adrien Guatto and
C{\'e}dric Pasteur and Marc Pouzet",
title = "A modular memory optimization for synchronous
data-flow languages: application to arrays in a
{Lustre} compiler",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "51--60",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248426",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "The generation of efficient sequential code for
synchronous data-flow languages raises two intertwined
issues: control and memory optimization. While the
former has been extensively studied, for instance in
the compilation of Lustre and Signal, the latter has
only been addressed in a restricted manner. Yet, memory
optimization becomes a pressing issue when arrays are
added to such languages. This article presents a
two-level solution to the memory optimization problem.
It combines a compile-time optimization algorithm,
reminiscent of register allocation, paired with
language annotations on the source given by the
designer. Annotations express in-place modifications
and control where allocation is performed. Moreover,
they allow external functions performing in-place
modifications to be safely imported. Soundness of
annotations is guaranteed by a semilinear type system
and additional scheduling constraints. A key feature is
that annotations for well-typed programs do not change
the semantics of the language: removing them may lead
to less efficient code but will not alter the
semantics. The method has been implemented in a new
compiler for a LUSTRE-like synchronous language
extended with hierarchical automata and arrays.
Experiments show that the proposed approach removes
most of the unnecessary array copies, resulting in
faster code that uses less memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sbirlea:2012:MDF,
author = "Alina Sb{\^\i}rlea and Yi Zou and Zoran Budiml{\'\i}c
and Jason Cong and Vivek Sarkar",
title = "Mapping a data-flow programming model onto
heterogeneous platforms",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "61--70",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248428",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "In this paper we explore mapping of a high-level macro
data-flow programming model called Concurrent
Collections (CnC) onto heterogeneous platforms in order
to achieve high performance and low energy consumption
while preserving the ease of use of data-flow
programming. Modern computing platforms are becoming
increasingly heterogeneous in order to improve energy
efficiency. This trend is clearly seen across a diverse
spectrum of platforms, from small-scale embedded SOCs
to large-scale super-computers. However, programming
these heterogeneous platforms poses a serious challenge
for application developers. We have designed a software
flow for converting high-level CnC programs to the
Habanero-C language. CnC programs have a clear
separation between the application description, the
implementation of each of the application components
and the abstraction of hardware platform, making it an
excellent programming model for domain experts. Domain
experts can later employ the help of a tuning expert
(either a compiler or a person) to tune their
applications with minimal effort. We also extend the
Habanero-C runtime system to support work-stealing
across heterogeneous computing devices and introduce
task affinity for these heterogeneous components to
allow users to fine tune the runtime scheduling
decisions. We demonstrate a working example that maps a
pipeline of medical image-processing algorithms onto a
prototype heterogeneous platform that includes CPUs,
GPUs and FPGAs. For the medical imaging domain, where
obtaining fast and accurate results is a critical step
in diagnosis and treatment of patients, we show that
our model offers up to 17.72X speedup and an estimated
usage of 0.52X of the power used by CPUs alone, when
using accelerators (GPUs and FPGAs) and CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hashemi:2012:FSU,
author = "Matin Hashemi and Mohammad H. Foroozannejad and Soheil
Ghiasi and Christoph Etzel",
title = "{FORMLESS}: scalable utilization of embedded manycores
in streaming applications",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "71--78",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248429",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Variants of dataflow specification models are widely
used to synthesize streaming applications for
distributed-memory parallel processors. We argue that
current practice of specifying streaming applications
using rigid dataflow models, implicitly prohibits a
number of platform oriented optimizations and hence
limits portability and scalability with respect to
number of processors. We motivate
Functionally-cOnsistent stRucturally-MalLEabe Streaming
Specification, dubbed FORMLESS, which refers to raising
the abstraction level beyond fixed-structure dataflow
to address its portability and scalability limitations.
To demonstrate the potential of the idea, we develop a
design space exploration scheme to customize the
application specification to better fit the target
platform. Experiments with several common streaming
case studies demonstrate improved portability and
scalability over conventional dataflow specification
models, and confirm the effectiveness of our
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Farhad:2012:PGD,
author = "S. M. Farhad and Yousun Ko and Bernd Burgstaller and
Bernhard Scholz",
title = "Profile-guided deployment of stream programs on
multicores",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "79--88",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248430",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Because multicore architectures have become the
industry standard, programming abstractions for
concurrent programming are of key importance. Stream
programming languages facilitate application domains
characterized by regular sequences of data, such as
multimedia, graphics, signal processing and networking.
With stream programs, computations are expressed
through independent actors that interact through FIFO
data channels. A major challenge with stream programs
is to load-balance actors among available processing
cores. The workload of a stream program is determined
by actor execution times and the communication overhead
induced by data channels. Estimating communication
costs on cache-coherent shared-memory multiprocessors
is difficult, because data movements are abstracted
away by the cache coherence protocol. Standard
execution time profiling techniques cannot separate
actor execution times from communication costs, because
communication costs manifest in terms of execution time
overhead. In this work we present a unified Integer
Linear Programming (ILP) formulation that balances the
workload of stream programs on cache-coherent multicore
architectures. For estimating the communication costs
of data channels, we devise a novel profiling scheme
that minimizes the number of profiling steps. We
conduct experiments across a range of StreamIt
benchmarks and show that our method achieves a speedup
of up to 4.02x on 6 processors. The number of profiling
steps is on average only 17\% of an exhaustive
profiling run over all data channels of a stream
program.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fang:2012:IDP,
author = "Zhenman Fang and Jiaxin Li and Weihua Zhang and Yi Li
and Haibo Chen and Binyu Zang",
title = "Improving dynamic prediction accuracy through
multi-level phase analysis",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "89--98",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248432",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Phase analysis, which classifies the set of execution
intervals with similar execution behavior and resource
requirements, has been widely used in a variety of
dynamic systems, including dynamic cache
reconfiguration, prefetching and race detection. While
phase granularity has been a major factor to the
accuracy of phase prediction, it has not been well
investigated yet and most dynamic systems usually adopt
a fine-grained prediction scheme. However, such a
scheme can only take account of recent local phase
information and could be frequently interfered by
temporary noises due to instant phase changes, which
might notably limit the prediction accuracy. In this
paper, we make the first investigation on the potential
of multi-level phase analysis (MLPA), where different
granularity phase analysis are combined together to
improve the overall accuracy. The key observation is
that a coarse-grained interval, which usually consists
of stably-distributed fine-grained intervals, can be
accurately identified based on the fine-grained
intervals at the beginning of its execution. Based on
the observation, we design and implement a MLPA scheme.
In such a scheme, a coarse-grained phase is first
identified based on the fine-grained intervals at the
beginning of its execution. The following fine-grained
phases in it are then predicted based on the sequence
of fine-grained phases in the coarse-grained phase.
Experimental results show such a scheme can notably
improve the prediction accuracy. Using Markov
fine-grained phase predictor as the baseline, MLPA can
improve prediction accuracy by 20\%, 39\% and 29\% for
next phase, phase change and phase length prediction
for SPEC2000 accordingly, yet incur only about 2\% time
overhead and 40\% space overhead (about 360 bytes in
total). To demonstrate the effectiveness of MLPA, we
apply it to a dynamic cache reconfiguration system
which dynamically adjusts the cache size to reduce the
power consumption and access time of data cache.
Experimental results show that MLPA can further reduce
the average cache size by 15\% compared to the
fine-grained scheme.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Khudia:2012:ESE,
author = "Daya Shanker Khudia and Griffin Wright and Scott
Mahlke",
title = "Efficient soft error protection for commodity embedded
microprocessors using profile information",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "99--108",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248433",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Successive generations of processors use smaller
transistors in the quest to make more powerful
computing systems. It has been previously studied that
smaller transistors make processors more susceptible to
soft errors (transient faults caused by high energy
particle strikes). Such errors can result in unexpected
behavior and incorrect results. With smaller and
cheaper transistors becoming pervasive in mainstream
computing, it is necessary to protect these devices
against soft errors; an increasing rate of faults
necessitates the protection of applications running on
commodity processors against soft errors. The existing
methods of protecting against such faults generally
have high area or performance overheads and thus are
not directly applicable in the embedded design space.
In order to protect against soft errors, the detection
of these errors is a necessary first step so that a
recovery can be triggered. To solve the problem of
detecting soft errors cheaply, we propose a
profiling-based software-only application analysis and
transformation solution. The goal is to develop a low
cost solution which can be deployed for off-the-shelf
embedded processors. The solution works by
intelligently duplicating instructions that are likely
to affect the program output, and comparing results
between original and duplicated instructions. The
intelligence of our solution is garnered through the
use of control flow, memory dependence, and value
profiling to understand and exploit the common-case
behavior of applications. Our solution is able to
achieve 92\% fault coverage with a 20\% instruction
overhead. This represents a 41\% lower performance
overhead than the best prior approaches with
approximately the same fault coverage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2012:CAP,
author = "Qingan Li and Mengying Zhao and Chun Jason Xue and
Yanxiang He",
title = "Compiler-assisted preferred caching for embedded
systems with {STT--RAM} based hybrid cache",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "109--118",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248434",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "As technology scales down, energy consumption is
becoming a big problem for traditional SRAM-based cache
hierarchies. The emerging Spin-Torque Transfer RAM
(STT-RAM) is a promising replacement for large on-chip
cache due to its ultra low leakage power and high
storage density. However, write operations on STT-RAM
suffer from considerably higher energy consumption and
longer latency than SRAM. Hybrid cache consisting of
both SRAM and STT-RAM has been proposed recently for
both performance and energy efficiency. Most management
strategies for hybrid caches employ migration-based
techniques to dynamically move write-intensive data
from STT-RAM to SRAM. These techniques lead to extra
overheads. In this paper, we propose a
compiler-assisted approach, preferred caching, to
significantly reduce the migration overhead by giving
migration-intensive memory blocks the preference for
the SRAM part of the hybrid cache. Furthermore, a data
assignment technique is proposed to improve the
efficiency of preferred caching. The reduction of
migration overhead can in turn improve the performance
and energy efficiency of STT-RAM based hybrid cache.
The experimental results show that, with the proposed
techniques, on average, the number of migrations is
reduced by 21.3\%, the total latency is reduced by
8.0\% and the total dynamic energy is reduced by
10.8\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zuluaga:2012:SDS,
author = "Marcela Zuluaga and Andreas Krause and Peter Milder
and Markus P{\"u}schel",
title = "``Smart'' design space sampling to predict
{Pareto}-optimal solutions",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "119--128",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248436",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "Many high-level synthesis tools offer degrees of
freedom in mapping high-level specifications to
Register-Transfer Level descriptions. These choices do
not affect the functional behavior but span a design
space of different cost-performance tradeoffs. In this
paper we present a novel machine learning-based
approach that efficiently determines the Pareto-optimal
designs while only sampling and synthesizing a fraction
of the design space. The approach combines three key
components: (1) A regression model based on Gaussian
processes to predict area and throughput based on
synthesis training data. (2) A ``smart'' sampling
strategy, GP-PUCB, to iteratively refine the model by
carefully selecting the next design to synthesize to
maximize progress. (3) A stopping criterion based on
assessing the accuracy of the model without access to
complete synthesis data. We demonstrate the
effectiveness of our approach using IP generators for
discrete Fourier transforms and sorting networks.
However, our algorithm is not specific to this
application and can be applied to a wide range of
Pareto front prediction problems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bouissou:2012:OSS,
author = "Olivier Bouissou and Alexandre Chapoutot",
title = "An operational semantics for {Simulink}'s simulation
engine",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "129--138",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248437",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "The industrial tool Matlab/Simulink is widely used in
the design of embedded systems. The main feature of
this tool is its ability to model in a common formalism
the software and its physical environment. This makes
it very useful for validating the design of embedded
software using numerical simulation. However, the
formal verification of such models is still problematic
as Simulink is a programming language for which no
formal semantics exists. In this article, we present an
operational semantics of a representative subset of
Simulink which includes both continuous-time and
discrete-time blocks. We believe that this work gives a
better understanding of Simulink and it defines the
foundations of a general framework to apply formal
methods on Simulink's high level descriptions of
embedded systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yu:2012:SCC,
author = "Fang Yu and Shun-Ching Yang and Farn Wang and
Guan-Cheng Chen and Che-Chang Chan",
title = "Symbolic consistency checking of {OpenMP} parallel
programs",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "139--148",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248438",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "We present a symbolic approach for checking
consistency of OpenMP parallel programs. A parallel
program is consistent if it yields the same result as
its sequential version despite the execution order
among threads. We find race conditions of an OpenMP
parallel program, construct the formal model of its
raced segments under relaxed memory models, and perform
guided symbolic simulation to search consistency
violations. The simulation terminates when (1) a
witness has been found (the program is inconsistent),
or (2) all reachable states have been explored (the
program is consistent). We have developed the tool
Pathg by incorporating Omega library to solve race
constraints and Red symbolic simulator to perform
guided search. We show that Pathg can prove consistency
of programs, identify races that modern OpenMP checkers
failed to report, and find inconsistency witnesses
effectively against benchmarks from the OpenMP Source
Code Repository and the NAS Parallel benchmark suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gal-On:2012:CPR,
author = "Shay Gal-On and Markus Levy",
title = "Creating portable, repeatable, realistic benchmarks
for embedded systems and the challenges thereof",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "149--152",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248440",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "LCTES '12 proceedings.",
abstract = "To appreciate the challenges of analysing embedded
processor behaviour, step back in time to understand
the evolution of embedded processors. Only a few
decades ago, embedded processors were relatively simple
devices (compared to today), represented by a host of
8- and 16-bit microcontrollers, and 32-bit
microprocessors, with minimal integration. Today, these
processors (even the so-called, low-end
microcontrollers), have evolved into highly-integrated
SoCs with a wide variety of architectures capable of
tackling both specific and general-purpose tasks.
Associated with these transformations, the benchmarks
used to quantify the capabilities have also grown in
complexity and range. At the simplest level, benchmarks
such as CoreMark analyse the fundamental processor
cores. At the other end of the spectrum, system
benchmarks, such BrowsingBench, analyse the entire SoC
as well as the system software stack and even the
physical interfaces. This paper examines some of the
challenges of applying such benchmarks, and explains
the methodologies used at EEMBC to manage portability,
repeatability, and realism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hosking:2012:CHL,
author = "Tony Hosking",
title = "Compiling a high-level language for {GPUs}: (via
language support for architectures and compilers)",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "1--12",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254066",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Languages such as OpenCL and CUDA offer a standard
interface for general-purpose programming of GPUs.
However, with these languages, programmers must
explicitly manage numerous low-level details involving
communication and synchronization. This burden makes
programming GPUs difficult and error-prone, rendering
these powerful devices inaccessible to most
programmers. We desire a higher-level programming model
that makes GPUs more accessible while also effectively
exploiting their computational power. This paper
presents features of Lime, a new Java-compatible
language targeting heterogeneous systems, that allow an
optimizing compiler to generate high quality GPU code.
The key insight is that the language type system
enforces isolation and immutability invariants that
allow the compiler to optimize for a GPU without heroic
compiler analysis. Our compiler attains GPU speedups
between 75\% and 140\% of the performance of native
OpenCL code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Samadi:2012:AIA,
author = "Mehrzad Samadi and Amir Hormati and Mojtaba Mehrara
and Janghaeng Lee and Scott Mahlke",
title = "Adaptive input-aware compilation for graphics
engines",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "13--22",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254067",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "While graphics processing units (GPUs) provide
low-cost and efficient platforms for accelerating high
performance computations, the tedious process of
performance tuning required to optimize applications is
an obstacle to wider adoption of GPUs. In addition to
the programmability challenges posed by GPU's complex
memory hierarchy and parallelism model, a well-known
application design problem is target portability across
different GPUs. However, even for a single GPU target,
changing a program's input characteristics can make an
already-optimized implementation of a program perform
poorly. In this work, we propose Adaptic, an adaptive
input-aware compilation system to tackle this
important, yet overlooked, input portability problem.
Using this system, programmers develop their
applications in a high-level streaming language and let
Adaptic undertake the difficult task of input portable
optimizations and code generation. Several input-aware
optimizations are introduced to make efficient use of
the memory hierarchy and customize thread composition.
At runtime, a properly optimized version of the
application is executed based on the actual program
input. We perform a head-to-head comparison between the
Adaptic generated and hand-optimized CUDA programs. The
results show that Adaptic is capable of generating
codes that can perform on par with their hand-optimized
counterparts over certain input ranges and outperform
them when the input falls out of the hand-optimized
programs' ``comfort zone''. Furthermore, we show that
input-aware results are sustainable across different
GPU targets making it possible to write and optimize
applications once and run them anywhere.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bacon:2012:TTW,
author = "David F. Bacon and Perry Cheng and Sunil Shukla",
title = "And then there were none: a stall-free real-time
garbage collector for reconfigurable hardware",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "23--34",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254068",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Programmers are turning to radical architectures such
as reconfigurable hardware (FPGAs) to achieve
performance. But such systems, programmed at a very low
level in languages with impoverished abstractions, are
orders of magnitude more complex to use than
conventional CPUs. The continued exponential increase
in transistors, combined with the desire to implement
ever more sophisticated algorithms, makes it imperative
that such systems be programmed at much higher levels
of abstraction. One of the fundamental high-level
language features is automatic memory management in the
form of garbage collection. We present the first
implementation of a complete garbage collector in
hardware (as opposed to previous ``hardware-assist''
techniques), using an FPGA and its on-chip memory.
Using a completely concurrent snapshot algorithm, it
provides single-cycle access to the heap, and never
stalls the mutator for even a single cycle, achieving a
deterministic mutator utilization (MMU) of 100\%. We
have synthesized the collector to hardware and show
that it never consumes more than 1\% of the logic
resources of a high-end FPGA. For comparison we also
implemented explicit (malloc/free) memory management,
and show that real-time collection is about 4\% to 17\%
slower than malloc, with comparable energy consumption.
Surprisingly, in hardware real-time collection is
superior to stop-the-world collection on every
performance axis, and even for stressful
micro-benchmarks can achieve 100\% MMU with heaps as
small as 1.01 to 1.4 times the absolute minimum.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Oliveira:2012:ICN,
author = "Bruno C. d. S. Oliveira and Tom Schrijvers and Wontae
Choi and Wonchan Lee and Kwangkeun Yi",
title = "The implicit calculus: a new foundation for generic
programming",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "35--44",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254070",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Generic programming (GP) is an increasingly important
trend in programming languages. Well-known GP
mechanisms, such as type classes and the C++0x concepts
proposal, usually combine two features: (1) a special
type of interfaces; and (2) implicit instantiation of
implementations of those interfaces. Scala implicits
are a GP language mechanism, inspired by type classes,
that break with the tradition of coupling implicit
instantiation with a special type of interface.
Instead, implicits provide only implicit instantiation,
which is generalized to work for any types. This turns
out to be quite powerful and useful to address many
limitations that show up in other GP mechanisms. This
paper synthesizes the key ideas of implicits formally
in a minimal and general core calculus called the
implicit calculus $ (\lambda \implies) $, and it shows
how to build source languages supporting implicit
instantiation on top of it. A novelty of the calculus
is its support for partial resolution and higher-order
rules (a feature that has been proposed before, but was
never formalized or implemented). Ultimately, the
implicit calculus provides a formal model of implicits,
which can be used by language designers to study and
inform implementations of similar mechanisms in their
own languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kawaguchi:2012:DPL,
author = "Ming Kawaguchi and Patrick Rondon and Alexander Bakst
and Ranjit Jhala",
title = "Deterministic parallelism via liquid effects",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "45--54",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254071",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Shared memory multithreading is a popular approach to
parallel programming, but also fiendishly hard to get
right. We present Liquid Effects, a type-and-effect
system based on refinement types which allows for
fine-grained, low-level, shared memory multi-threading
while statically guaranteeing that a program is
deterministic. Liquid Effects records the effect of an
expression as a for- mula in first-order logic, making
our type-and-effect system highly expressive. Further,
effects like Read and Write are recorded in Liquid
Effects as ordinary uninterpreted predicates, leaving
the effect system open to extension by the user. By
building our system as an extension to an existing
dependent refinement type system, our system gains
precise value- and branch-sensitive reasoning about
effects. Finally, our system exploits the Liquid Types
refinement type inference technique to automatically
infer refinement types and effects. We have implemented
our type-and-effect checking techniques in CSOLVE, a
refinement type inference system for C programs. We
demonstrate how CSOLVE uses Liquid Effects to prove the
determinism of a variety of benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Guerraoui:2012:SL,
author = "Rachid Guerraoui and Viktor Kuncak and Giuliano Losa",
title = "Speculative linearizability",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "55--66",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254072",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Linearizability is a key design methodology for
reasoning about implementations of concurrent abstract
data types in both shared memory and message passing
systems. It provides the illusion that operations
execute sequentially and fault-free, despite the
asynchrony and faults inherent to a concurrent system,
especially a distributed one. A key property of
linearizability is inter-object composability: a system
composed of linearizable objects is itself
linearizable. However, devising linearizable objects is
very difficult, requiring complex algorithms to work
correctly under general circumstances, and often
resulting in bad average-case behavior. Concurrent
algorithm designers therefore resort to speculation:
optimizing algorithms to handle common scenarios more
efficiently. The outcome are even more complex
protocols, for which it is no longer tractable to prove
their correctness. To simplify the design of efficient
yet robust linearizable protocols, we propose a new
notion: speculative linearizability. This property is
as general as linearizability, yet it allows
intra-object composability: the correctness of
independent protocol phases implies the correctness of
their composition. In particular, it allows the
designer to focus solely on the proof of an
optimization and derive the correctness of the overall
protocol from the correctness of the existing,
non-optimized one. Our notion of protocol phases allows
processes to independently switch from one phase to
another, without requiring them to reach agreement to
determine the change of a phase. To illustrate the
applicability of our methodology, we show how examples
of speculative algorithms for shared memory and
asynchronous message passing naturally fit into our
framework. We rigorously define speculative
linearizability and prove our intra-object composition
theorem in a trace-based as well as an automaton-based
model. To obtain a further degree of confidence, we
also formalize and mechanically check the theorem in
the automaton-based model, using the I/O automata
framework within the Isabelle interactive proof
assistant. We expect our framework to enable, for the
first time, scalable specifications and mechanical
proofs of speculative implementations of linearizable
objects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zaparanuks:2012:AP,
author = "Dmitrijs Zaparanuks and Matthias Hauswirth",
title = "Algorithmic profiling",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "67--76",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254074",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Traditional profilers identify where a program spends
most of its resources. They do not provide information
about why the program spends those resources or about
how resource consumption would change for different
program inputs. In this paper we introduce the idea of
algorithmic profiling. While a traditional profiler
determines a set of measured cost values, an
algorithmic profiler determines a cost function. It
does that by automatically determining the ``inputs''
of a program, by measuring the program's ``cost'' for
any given input, and by inferring an empirical cost
function.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jin:2012:UDR,
author = "Guoliang Jin and Linhai Song and Xiaoming Shi and Joel
Scherpelz and Shan Lu",
title = "Understanding and detecting real-world performance
bugs",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "77--88",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254075",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Developers frequently use inefficient code sequences
that could be fixed by simple patches. These
inefficient code sequences can cause significant
performance degradation and resource waste, referred to
as performance bugs. Meager increases in single
threaded performance in the multi-core era and
increasing emphasis on energy efficiency call for more
effort in tackling performance bugs. This paper
conducts a comprehensive study of 110 real-world
performance bugs that are randomly sampled from five
representative software suites (Apache, Chrome, GCC,
Mozilla, and MySQL). The findings of this study provide
guidance for future work to avoid, expose, detect, and
fix performance bugs. Guided by our characteristics
study, efficiency rules are extracted from 25 patches
and are used to detect performance bugs. 332 previously
unknown performance problems are found in the latest
versions of MySQL, Apache, and Mozilla applications,
including 219 performance problems found by applying
rules across applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Coppa:2012:ISP,
author = "Emilio Coppa and Camil Demetrescu and Irene Finocchi",
title = "Input-sensitive profiling",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "89--98",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254076",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "In this paper we present a profiling methodology and
toolkit for helping developers discover hidden
asymptotic inefficiencies in the code. From one or more
runs of a program, our profiler automatically measures
how the performance of individual routines scales as a
function of the input size, yielding clues to their
growth rate. The output of the profiler is, for each
executed routine of the program, a set of tuples that
aggregate performance costs by input size. The
collected profiles can be used to produce performance
plots and derive trend functions by statistical curve
fitting or bounding techniques. A key feature of our
method is the ability to automatically measure the size
of the input given to a generic code fragment: to this
aim, we propose an effective metric for estimating the
input size of a routine and show how to compute it
efficiently. We discuss several case studies, showing
that our approach can reveal asymptotic bottlenecks
that other profilers may fail to detect and
characterize the workload and behavior of individual
routines in the context of real applications. To prove
the feasibility of our techniques, we implemented a
Valgrind tool called aprof and performed an extensive
experimental evaluation on the SPEC CPU2006 benchmarks.
Our experiments show that aprof delivers comparable
performance to other prominent Valgrind tools, and can
generate informative plots even from single runs on
typical workloads for most algorithmically-critical
routines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhang:2012:LBC,
author = "Danfeng Zhang and Aslan Askarov and Andrew C. Myers",
title = "Language-based control and mitigation of timing
channels",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "99--110",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254078",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "We propose a new language-based approach to mitigating
timing channels. In this language, well-typed programs
provably leak only a bounded amount of information over
time through external timing channels. By incorporating
mechanisms for predictive mitigation of timing
channels, this approach also permits a more expressive
programming model. Timing channels arising from
interaction with underlying hardware features such as
instruction caches are controlled. Assumptions about
the underlying hardware are explicitly formalized,
supporting the design of hardware that efficiently
controls timing channels. One such hardware design is
modeled and used to show that timing channels can be
controlled in some simple programs of real-world
significance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chiw:2012:DPD,
author = "Charisee Chiw and Gordon Kindlmann and John Reppy and
Lamont Samuels and Nick Seltzer",
title = "{Diderot}: a parallel {DSL} for image analysis and
visualization",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "111--120",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254079",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Research scientists and medical professionals use
imaging technology, such as computed tomography (CT)
and magnetic resonance imaging (MRI) to measure a wide
variety of biological and physical objects. The
increasing sophistication of imaging technology creates
demand for equally sophisticated computational
techniques to analyze and visualize the image data.
Analysis and visualization codes are often crafted for
a specific experiment or set of images, thus imaging
scientists need support for quickly developing codes
that are reliable, robust, and efficient. In this
paper, we present the design and implementation of
Diderot, which is a parallel domain-specific language
for biomedical image analysis and visualization.
Diderot supports a high-level model of computation that
is based on continuous tensor fields. These tensor
fields are reconstructed from discrete image data using
separable convolution kernels, but may also be defined
by applying higher-order operations, such as
differentiation ({$ \Delta $}). Early experiments
demonstrate that Diderot provides both a high-level
concise notation for image analysis and visualization
algorithms, as well as high sequential and parallel
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cartey:2012:SGC,
author = "Luke Cartey and Rune Lyngs{\o} and Oege de Moor",
title = "Synthesising graphics card programs from {DSLs}",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "121--132",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254080",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Over the last five years, graphics cards have become a
tempting target for scientific computing, thanks to
unrivaled peak performance, often producing a runtime
speed-up of x10 to x25 over comparable CPU solutions.
However, this increase can be difficult to achieve, and
doing so often requires a fundamental rethink. This is
especially problematic in scientific computing, where
experts do not want to learn yet another architecture.
In this paper we develop a method for automatically
parallelising recursive functions of the sort found in
scientific papers. Using a static analysis of the
function dependencies we identify sets --- partitions
--- of independent elements, which we use to synthesise
an efficient GPU implementation using polyhedral code
generation techniques. We then augment our language
with DSL extensions to support a wider variety of
applications, and demonstrate the effectiveness of this
with three case studies, showing significant
performance improvement over equivalent CPU methods,
and similar efficiency to hand-tuned GPU
implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Raman:2012:PSF,
author = "Arun Raman and Ayal Zaks and Jae W. Lee and David I.
August",
title = "{Parcae}: a system for flexible parallel execution",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "133--144",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254082",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Workload, platform, and available resources constitute
a parallel program's execution environment. Most
parallelization efforts statically target an
anticipated range of environments, but performance
generally degrades outside that range. Existing
approaches address this problem with dynamic tuning but
do not optimize a multiprogrammed system holistically.
Further, they either require manual programming effort
or are limited to array-based data-parallel programs.
This paper presents Parcae, a generally applicable
automatic system for platform-wide dynamic tuning.
Parcae includes (i) the Nona compiler, which creates
flexible parallel programs whose tasks can be
efficiently reconfigured during execution; (ii) the
Decima monitor, which measures resource availability
and system performance to detect change in the
environment; and (iii) the Morta executor, which cuts
short the life of executing tasks, replacing them with
other functionally equivalent tasks better suited to
the current environment. Parallel programs made
flexible by Parcae outperform original parallel
implementations in many interesting scenarios.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tripp:2012:JEP,
author = "Omer Tripp and Roman Manevich and John Field and Mooly
Sagiv",
title = "{JANUS}: exploiting parallelism via hindsight",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "145--156",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254083",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "This paper addresses the problem of reducing
unnecessary conflicts in optimistic synchronization.
Optimistic synchronization must ensure that any two
concurrently executing transactions that commit are
properly synchronized. Conflict detection is an
approximate check for this condition. For efficiency,
the traditional approach to conflict detection
conservatively checks that the memory locations
mutually accessed by two concurrent transactions are
accessed only for reading. We present JANUS, a
parallelization system that performs conflict detection
by considering sequences of operations and their
composite effect on the system's state. This is done
efficiently, such that the runtime overhead due to
conflict detection is on a par with that of
write-conflict-based detection. In certain common
scenarios, this mode of refinement dramatically
improves the precision of conflict detection, thereby
reducing the number of false conflicts. Our empirical
evaluation of JANUS shows that this precision gain
reduces the abort rate by an order of magnitude (22x on
average), and achieves a speedup of up to 2.5x, on a
suite of real-world benchmarks where no parallelism is
exploited by the standard approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Turon:2012:REC,
author = "Aaron Turon",
title = "{Reagents}: expressing and composing fine-grained
concurrency",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "157--168",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254084",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Efficient communication and synchronization is crucial
for fine grained parallelism. Libraries providing such
features, while indispensable, are difficult to write,
and often cannot be tailored or composed to meet the
needs of specific users. We introduce reagents, a set
of combinators for concisely expressing concurrency
algorithms. Reagents scale as well as their hand-coded
counterparts, while providing the composability
existing libraries lack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Carbin:2012:PAP,
author = "Michael Carbin and Deokhwan Kim and Sasa Misailovic
and Martin C. Rinard",
title = "Proving acceptability properties of relaxed
nondeterministic approximate programs",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "169--180",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254086",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Approximate program transformations such as skipping
tasks [29, 30], loop perforation [21, 22, 35],
reduction sampling [38], multiple selectable
implementations [3, 4, 16, 38], dynamic knobs [16],
synchronization elimination [20, 32], approximate
function memoization [11],and approximate data types
[34] produce programs that can execute at a variety of
points in an underlying performance versus accuracy
tradeoff space. These transformed programs have the
ability to trade accuracy of their results for
increased performance by dynamically and
nondeterministically modifying variables that control
their execution. We call such transformed programs
relaxed programs because they have been extended with
additional nondeterminism to relax their semantics and
enable greater flexibility in their execution. We
present language constructs for developing and
specifying relaxed programs. We also present proof
rules for reasoning about properties [28] which the
program must satisfy to be acceptable. Our proof rules
work with two kinds of acceptability properties:
acceptability properties [28], which characterize
desired relationships between the values of variables
in the original and relaxed programs, and unary
acceptability properties, which involve values only
from a single (original or relaxed) program. The proof
rules support a staged reasoning approach in which the
majority of the reasoning effort works with the
original program. Exploiting the common structure that
the original and relaxed programs share, relational
reasoning transfers reasoning effort from the original
program to prove properties of the relaxed program. We
have formalized the dynamic semantics of our target
programming language and the proof rules in Coq and
verified that the proof rules are sound with respect to
the dynamic semantics. Our Coq implementation enables
developers to obtain fully machine-checked
verifications of their relaxed programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dillig:2012:AED,
author = "Isil Dillig and Thomas Dillig and Alex Aiken",
title = "Automated error diagnosis using abductive inference",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "181--192",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254087",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "When program verification tools fail to verify a
program, either the program is buggy or the report is a
false alarm. In this situation, the burden is on the
user to manually classify the report, but this task is
time-consuming, error-prone, and does not utilize facts
already proven by the analysis. We present a new
technique for assisting users in classifying error
reports. Our technique computes small, relevant queries
presented to a user that capture exactly the
information the analysis is missing to either discharge
or validate the error. Our insight is that identifying
these missing facts is an instance of the abductive
inference problem in logic, and we present a new
algorithm for computing the smallest and most general
abductions in this setting. We perform the first user
study to rigorously evaluate the accuracy and effort
involved in manual classification of error reports. Our
study demonstrates that our new technique is very
useful for improving both the speed and accuracy of
error report classification. Specifically, our approach
improves classification accuracy from 33\% to 90\% and
reduces the time programmers take to classify error
reports from approximately 5 minutes to under 1
minute.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kuznetsov:2012:ESM,
author = "Volodymyr Kuznetsov and Johannes Kinder and Stefan
Bucur and George Candea",
title = "Efficient state merging in symbolic execution",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "193--204",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254088",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Symbolic execution has proven to be a practical
technique for building automated test case generation
and bug finding tools. Nevertheless, due to state
explosion, these tools still struggle to achieve
scalability. Given a program, one way to reduce the
number of states that the tools need to explore is to
merge states obtained on different paths. Alas, doing
so increases the size of symbolic path conditions
(thereby stressing the underlying constraint solver)
and interferes with optimizations of the exploration
process (also referred to as search strategies). The
net effect is that state merging may actually lower
performance rather than increase it. We present a way
to automatically choose when and how to merge states
such that the performance of symbolic execution is
significantly increased. First, we present query count
estimation, a method for statically estimating the
impact that each symbolic variable has on solver
queries that follow a potential merge point; states are
then merged only when doing so promises to be
advantageous. Second, we present dynamic state merging,
a technique for merging states that interacts favorably
with search strategies in automated test case
generation and bug finding tools. Experiments on the 96
GNU Coreutils show that our approach consistently
achieves several orders of magnitude speedup over
previously published results. Our code and experimental
data are publicly available at http://cloud9.epfl.ch.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wu:2012:SPA,
author = "Jingyue Wu and Yang Tang and Gang Hu and Heming Cui
and Junfeng Yang",
title = "Sound and precise analysis of parallel programs
through schedule specialization",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "205--216",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254090",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Parallel programs are known to be difficult to
analyze. A key reason is that they typically have an
enormous number of execution interleavings, or
schedules. Static analysis over all schedules requires
over-approximations, resulting in poor precision;
dynamic analysis rarely covers more than a tiny
fraction of all schedules. We propose an approach
called schedule specialization to analyze a parallel
program over only a small set of schedules for
precision, and then enforce these schedules at runtime
for soundness of the static analysis results. We build
a schedule specialization framework for C/C++
multithreaded programs that use Pthreads. Our framework
avoids the need to modify every analysis to be
schedule-aware by specializing a program into a simpler
program based on a schedule, so that the resultant
program can be analyzed with stock analyses for
improved precision. Moreover, our framework provides a
precise schedule-aware def-use analysis on memory
locations, enabling us to build three highly precise
analyses: an alias analyzer, a data-race detector, and
a path slicer. Evaluation on 17 programs, including 2
real-world programs and 15 popular benchmarks, shows
that analyses using our framework reduced may-aliases
by 61.9\%, false race reports by 69\%, and path slices
by 48.7\%; and detected 7 unknown bugs in well-checked
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Albarghouthi:2012:PTI,
author = "Aws Albarghouthi and Rahul Kumar and Aditya V. Nori
and Sriram K. Rajamani",
title = "Parallelizing top-down interprocedural analyses",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "217--228",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254091",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Modularity is a central theme in any scalable program
analysis. The core idea in a modular analysis is to
build summaries at procedure boundaries, and use the
summary of a procedure to analyze the effect of calling
it at its calling context. There are two ways to
perform a modular program analysis: (1) top-down and
(2) bottomup. A bottom-up analysis proceeds upwards
from the leaves of the call graph, and analyzes each
procedure in the most general calling context and
builds its summary. In contrast, a top-down analysis
starts from the root of the call graph, and proceeds
downward, analyzing each procedure in its calling
context. Top-down analyses have several applications in
verification and software model checking. However,
traditionally, bottom-up analyses have been easier to
scale and parallelize than top-down analyses. In this
paper, we propose a generic framework, BOLT, which uses
MapReduce style parallelism to scale top-down analyses.
In particular, we consider top-down analyses that are
demand driven, such as the ones used for software model
checking. In such analyses, each intraprocedural
analysis happens in the context of a reachability
query. A query Q over a procedure P results in query
tree that consists of sub-queries over the procedures
called by P. The key insight in BOLT is that the query
tree can be explored in parallel using MapReduce style
parallelism --- the map stage can be used to run a set
of enabled queries in parallel, and the reduce stage
can be used to manage inter-dependencies between
queries. Iterating the map and reduce stages
alternately, we can exploit the parallelism inherent in
top-down analyses. Another unique feature of BOLT is
that it is parameterized by the algorithm used for
intraprocedural analysis. Several kinds of analyses,
including may analyses, must analyses, and
may-must-analyses can be parallelized using BOLT. We
have implemented the BOLT framework and instantiated
the intraprocedural parameter with a may-must-analysis.
We have run BOLT on a test suite consisting of 45
Microsoft Windows device drivers and 150 safety
properties. Our results demonstrate an average speedup
of 3.71x and a maximum speedup of 7.4x (with 8 cores)
over a sequential analysis. Moreover, in several checks
where a sequential analysis fails, BOLT is able to
successfully complete its analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Oh:2012:DIS,
author = "Hakjoo Oh and Kihong Heo and Wonchan Lee and Woosuk
Lee and Kwangkeun Yi",
title = "Design and implementation of sparse global analyses
for {C}-like languages",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "229--238",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254092",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "In this article we present a general method for
achieving global static analyzers that are precise,
sound, yet also scalable. Our method generalizes the
sparse analysis techniques on top of the abstract
interpretation framework to support relational as well
as non-relational semantics properties for C-like
languages. We first use the abstract interpretation
framework to have a global static analyzer whose
scalability is unattended. Upon this underlying sound
static analyzer, we add our generalized sparse analysis
techniques to improve its scalability while preserving
the precision of the underlying analysis. Our framework
determines what to prove to guarantee that the
resulting sparse version should preserve the precision
of the underlying analyzer. We formally present our
framework; we present that existing sparse analyses are
all restricted instances of our framework; we show more
semantically elaborate design examples of sparse
non-relational and relational static analyses; we
present their implementation results that scale to
analyze up to one million lines of C programs. We also
show a set of implementation techniques that turn out
to be critical to economically support the sparse
analysis process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hackett:2012:FPH,
author = "Brian Hackett and Shu-yu Guo",
title = "Fast and precise hybrid type inference for
{JavaScript}",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "239--250",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254094",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "JavaScript performance is often bound by its
dynamically typed nature. Compilers do not have access
to static type information, making generation of
efficient, type-specialized machine code difficult. We
seek to solve this problem by inferring types. In this
paper we present a hybrid type inference algorithm for
JavaScript based on points-to analysis. Our algorithm
is fast, in that it pays for itself in the
optimizations it enables. Our algorithm is also
precise, generating information that closely reflects
the program's actual behavior even when analyzing
polymorphic code, by augmenting static analysis with
run-time type barriers. We showcase an implementation
for Mozilla Firefox's JavaScript engine, demonstrating
both performance gains and viability. Through
integration with the just-in-time (JIT) compiler in
Firefox, we have improved performance on major
benchmarks and JavaScript-heavy websites by up to 50\%.
Inference-enabled compilation is the default
compilation mode as of Firefox 9.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Petrov:2012:RDW,
author = "Boris Petrov and Martin Vechev and Manu Sridharan and
Julian Dolby",
title = "Race detection for {Web} applications",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "251--262",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254095",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Modern web pages are becoming increasingly
full-featured, and this additional functionality often
requires greater use of asynchrony. Unfortunately, this
asynchrony can trigger unexpected concurrency errors,
even though web page scripts are executed sequentially.
We present the first formulation of a happens-before
relation for common web platform features. Developing
this relation was a non-trivial task, due to complex
feature interactions and browser differences. We also
present a logical memory access model for web
applications that abstracts away browser implementation
details. Based on the above, we implemented WebRacer,
the first dynamic race detector for web applications.
WebRacer is implemented atop the production-quality
WebKit engine, enabling testing of full-featured web
sites. WebRacer can also simulate certain user actions,
exposing more races. We evaluated WebRacer by testing a
large set of Fortune 100 company web sites. We
discovered many harmful races, and also gained insights
into how developers handle asynchrony in practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fischer:2012:EDM,
author = "Jeffrey Fischer and Rupak Majumdar and Shahram
Esmaeilsabzali",
title = "{Engage}: a deployment management system",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "263--274",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254096",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Many modern applications are built by combining
independently developed packages and services that are
distributed over many machines with complex
inter-dependencies. The assembly, installation, and
management of such applications is hard, and usually
performed either manually or by writing customized
scripts. We present Engage, a system for configuring,
installing, and managing complex application stacks.
Engage consists of three components: a domain-specific
model to describe component metadata and
inter-component dependencies; a constraint-based
algorithm that takes a partial installation
specification and computes a full installation plan;
and a runtime system that co-ordinates the deployment
of the application across multiple machines and manages
the deployed system. By explicitly modeling
configuration metadata and inter-component
dependencies, Engage enables static checking of
application configurations and automated,
constraint-driven, generation of installation plans
across multiple machines. This reduces the tedious
manual process of application configuration,
installation, and management. We have implemented
Engage and we have used it to successfully host a
number of applications. We describe our experiences in
using Engage to manage a generic platform that hosts
Django applications in the cloud or on premises.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Perelman:2012:TDC,
author = "Daniel Perelman and Sumit Gulwani and Thomas Ball and
Dan Grossman",
title = "Type-directed completion of partial expressions",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "275--286",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254098",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Modern programming frameworks provide enormous
libraries arranged in complex structures, so much so
that a large part of modern programming is searching
for APIs that surely exist ``somewhere in an unfamiliar
part of the framework.'' We present a novel way of
phrasing a search for an unknown API: the programmer
simply writes an expression leaving holes for the parts
they do not know. We call these expressions partial
expressions. We present an efficient algorithm that
produces likely completions ordered by a ranking scheme
based primarily on the similarity of the types of the
APIs suggested to the types of the known expressions.
This gives a powerful language for both API discovery
and code completion with a small impedance mismatch
from writing code. In an automated experiment on mature
C\# projects, we show our algorithm can place the
intended expression in the top 10 choices over 80\% of
the time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{hunEom:2012:SSJ,
author = "Yong hun Eom and Brian Demsky",
title = "Self-stabilizing {Java}",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "287--298",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254099",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Self-stabilizing programs automatically recover from
state corruption caused by software bugs and other
sources to reach the correct state. A number of
applications are inherently self-stabilizing---such
programs typically overwrite all non-constant data with
new input data. We present a type system and static
analyses that together check whether a program is
self-stabilizing. We combine this with a code
generation strategy that ensures that a program
continues executing long enough to self-stabilize. Our
experience using SJava indicates that (1) SJava
annotations are easy to write once one understands a
program and (2) SJava successfully checked that several
benchmarks were self-stabilizing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2012:TDA,
author = "Yan Chen and Joshua Dunfield and Umut A. Acar",
title = "Type-directed automatic incrementalization",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "299--310",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254100",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Application data often changes slowly or incrementally
over time. Since incremental changes to input often
result in only small changes in output, it is often
feasible to respond to such changes asymptotically more
efficiently than by re-running the whole computation.
Traditionally, realizing such asymptotic efficiency
improvements requires designing problem-specific
algorithms known as dynamic or incremental algorithms,
which are often significantly more complicated than
conventional algorithms to design, analyze, implement,
and use. A long-standing open problem is to develop
techniques that automatically transform conventional
programs so that they correctly and efficiently respond
to incremental changes. In this paper, we describe a
significant step towards solving the problem of
automatic incrementalization: a programming language
and a compiler that can, given a few type annotations
describing what can change over time, compile a
conventional program that assumes its data to be static
(unchanging over time) to an incremental program. Based
on recent advances in self-adjusting computation,
including a theoretical proposal for translating purely
functional programs to self-adjusting programs, we
develop techniques for translating conventional
Standard ML programs to self-adjusting programs. By
extending the Standard ML language, we design a fully
featured programming language with higher-order
features, a module system, and a powerful type system,
and implement a compiler for this language. The
resulting programming language, LML, enables
translating conventional programs decorated with simple
type annotations into incremental programs that can
respond to changes in their data correctly and
efficiently. We evaluate the effectiveness of our
approach by considering a range of benchmarks involving
lists, vectors, and matrices, as well as a ray tracer.
For these benchmarks, our compiler incrementalizes
existing code with only trivial amounts of annotation.
The resulting programs are often asymptotically more
efficient, leading to orders of magnitude speedups in
practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sarkar:2012:SCC,
author = "Susmit Sarkar and Kayvan Memarian and Scott Owens and
Mark Batty and Peter Sewell and Luc Maranget and Jade
Alglave and Derek Williams",
title = "Synchronising {C\slash C++} and {POWER}",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "311--322",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254102",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Shared memory concurrency relies on synchronisation
primitives: compare-and-swap,
load-reserve/store-conditional (aka LL/SC),
language-level mutexes, and so on. In a sequentially
consistent setting, or even in the TSO setting of x86
and Sparc, these have well-understood semantics. But in
the very relaxed settings of IBM\reg{}, POWER\reg{},
ARM, or C/C++, it remains surprisingly unclear exactly
what the programmer can depend on. This paper studies
relaxed-memory synchronisation. On the hardware side,
we give a clear semantic characterisation of the
load-reserve/store-conditional primitives as provided
by POWER multiprocessors, for the first time since they
were introduced 20 years ago; we cover their
interaction with relaxed loads, stores, barriers, and
dependencies. Our model, while not officially
sanctioned by the vendor, is validated by extensive
testing, comparing actual implementation behaviour
against an oracle generated from the model, and by
detailed discussion with IBM staff. We believe the ARM
semantics to be similar. On the software side, we prove
sound a proposed compilation scheme of the C/C++
synchronisation constructs to POWER, including C/C++
spinlock mutexes, fences, and read-modify-write
operations, together with the simpler atomic operations
for which soundness is already known from our previous
work; this is a first step in verifying concurrent
algorithms that use load-reserve/store-conditional with
respect to a realistic semantics. We also build
confidence in the C/C++ model in its own terms, fixing
some omissions and contributing to the C standards
committee adoption of the C++11 concurrency model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gazzillo:2012:SPA,
author = "Paul Gazzillo and Robert Grimm",
title = "{SuperC}: parsing all of {C} by taming the
preprocessor",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "323--334",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254103",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "C tools, such as source browsers, bug finders, and
automated refactorings, need to process two languages:
C itself and the preprocessor. The latter improves
expressivity through file includes, macros, and static
conditionals. But it operates only on tokens, making it
hard to even parse both languages. This paper presents
a complete, performant solution to this problem. First,
a configuration-preserving preprocessor resolves
includes and macros yet leaves static conditionals
intact, thus preserving a program's variability. To
ensure completeness, we analyze all interactions
between preprocessor features and identify techniques
for correctly handling them. Second, a
configuration-preserving parser generates a well-formed
AST with static choice nodes for conditionals. It forks
new subparsers when encountering static conditionals
and merges them again after the conditionals. To ensure
performance, we present a simple algorithm for
table-driven Fork-Merge LR parsing and four novel
optimizations. We demonstrate the effectiveness of our
approach on the x86 Linux kernel.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Regehr:2012:TCR,
author = "John Regehr and Yang Chen and Pascal Cuoq and Eric
Eide and Chucky Ellison and Xuejun Yang",
title = "Test-case reduction for {C} compiler bugs",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "335--346",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254104",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "To report a compiler bug, one must often find a small
test case that triggers the bug. The existing approach
to automated test-case reduction, delta debugging,
works by removing substrings of the original input; the
result is a concatenation of substrings that delta
cannot remove. We have found this approach less than
ideal for reducing C programs because it typically
yields test cases that are too large or even invalid
(relying on undefined behavior). To obtain small and
valid test cases consistently, we designed and
implemented three new, domain-specific test-case
reducers. The best of these is based on a novel
framework in which a generic fixpoint computation
invokes modular transformations that perform reduction
operations. This reducer produces outputs that are, on
average, more than 25 times smaller than those produced
by our other reducers or by the existing reducer that
is most commonly used by compiler developers. We
conclude that effective program reduction requires more
than straightforward delta debugging.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2012:CFE,
author = "Jun Liu and Yuanrui Zhang and Ohyoung Jang and Wei
Ding and Mahmut Kandemir",
title = "A compiler framework for extracting superword level
parallelism",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "347--358",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254106",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "SIMD (single-instruction multiple-data) instruction
set extensions are quite common today in both high
performance and embedded microprocessors, and enable
the exploitation of a specific type of data parallelism
called SLP (Superword Level Parallelism). While prior
research shows that significant performance savings are
possible when SLP is exploited, placing SIMD
instructions in an application code manually can be
very difficult and error prone. In this paper, we
propose a novel automated compiler framework for
improving superword level parallelism exploitation. The
key part of our framework consists of two stages:
superword statement generation and data layout
optimization. The first stage is our main contribution
and has two phases, statement grouping and statement
scheduling, of which the primary goals are to increase
SIMD parallelism and, more importantly, capture more
superword reuses among the superword statements through
global data access and reuse pattern analysis. Further,
as a complementary optimization, our data layout
optimization organizes data in memory space such that
the price of memory operations for SLP is minimized.
The results from our compiler implementation and tests
on two systems indicate performance improvements as
high as 15.2\% over a state-of-the-art SLP optimization
algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Johnson:2012:SSP,
author = "Nick P. Johnson and Hanjun Kim and Prakash Prabhu and
Ayal Zaks and David I. August",
title = "Speculative separation for privatization and
reductions",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "359--370",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254107",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Automatic parallelization is a promising strategy to
improve application performance in the multicore era.
However, common programming practices such as the reuse
of data structures introduce artificial constraints
that obstruct automatic parallelization. Privatization
relieves these constraints by replicating data
structures, thus enabling scalable parallelization.
Prior privatization schemes are limited to arrays and
scalar variables because they are sensitive to the
layout of dynamic data structures. This work presents
Privateer, the first fully automatic privatization
system to handle dynamic and recursive data structures,
even in languages with unrestricted pointers. To reduce
sensitivity to memory layout, Privateer speculatively
separates memory objects. Privateer's lightweight
runtime system validates speculative separation and
speculative privatization to ensure correct parallel
execution. Privateer enables automatic parallelization
of general-purpose C/C++ applications, yielding a
geomean whole-program speedup of 11.4x over best
sequential execution on 24 cores, while non-speculative
parallelization yields only 0.93x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Holewinski:2012:DTB,
author = "Justin Holewinski and Ragavendar Ramamurthi and Mahesh
Ravishankar and Naznin Fauzia and Louis-No{\"e}l
Pouchet and Atanas Rountev and P. Sadayappan",
title = "Dynamic trace-based analysis of vectorization
potential of applications",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "371--382",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254108",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Recent hardware trends with GPUs and the increasing
vector lengths of SSE-like ISA extensions for multicore
CPUs imply that effective exploitation of SIMD
parallelism is critical for achieving high performance
on emerging and future architectures. A vast majority
of existing applications were developed without any
attention by their developers towards effective
vectorizability of the codes. While developers of
production compilers such as GNU gcc, Intel icc, PGI
pgcc, and IBM xlc have invested considerable effort and
made significant advances in enhancing automatic
vectorization capabilities, these compilers still
cannot effectively vectorize many existing scientific
and engineering codes. It is therefore of considerable
interest to analyze existing applications to assess the
inherent latent potential for SIMD parallelism,
exploitable through further compiler advances and/or
via manual code changes. In this paper we develop an
approach to infer a program's SIMD parallelization
potential by analyzing the dynamic data-dependence
graph derived from a sequential execution trace. By
considering only the observed run-time data dependences
for the trace, and by relaxing the execution order of
operations to allow any dependence-preserving
reordering, we can detect potential SIMD parallelism
that may otherwise be missed by more conservative
compile-time analyses. We show that for several
benchmarks our tool discovers regions of code within
computationally-intensive loops that exhibit high
potential for SIMD parallelism but are not vectorized
by state-of-the-art compilers. We present several case
studies of the use of the tool, both in identifying
opportunities to enhance the transformation
capabilities of vectorizing compilers, as well as in
pointing to code regions to manually modify in order to
enable auto-vectorization and performance improvement
by existing compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Leung:2012:VGK,
author = "Alan Leung and Manish Gupta and Yuvraj Agarwal and
Rajesh Gupta and Ranjit Jhala and Sorin Lerner",
title = "Verifying {GPU} kernels by test amplification",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "383--394",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254110",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "We present a novel technique for verifying properties
of data parallel GPU programs via test amplification.
The key insight behind our work is that we can use the
technique of static information flow to amplify the
result of a single test execution over the set of all
inputs and interleavings that affect the property being
verified. We empirically demonstrate the effectiveness
of test amplification for verifying race-freedom and
determinism over a large number of standard GPU
kernels, by showing that the result of verifying a
single dynamic execution can be amplified over the
massive space of possible data inputs and thread
interleavings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Morrisett:2012:RBF,
author = "Greg Morrisett and Gang Tan and Joseph Tassarotti and
Jean-Baptiste Tristan and Edward Gan",
title = "{RockSalt}: better, faster, stronger {SFI} for the
x86",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "395--404",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254111",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Software-based fault isolation (SFI), as used in
Google's Native Client (NaCl), relies upon a
conceptually simple machine-code analysis to enforce a
security policy. But for complicated architectures such
as the x86, it is all too easy to get the details of
the analysis wrong. We have built a new checker that is
smaller, faster, and has a much reduced trusted
computing base when compared to Google's original
analysis. The key to our approach is automatically
generating the bulk of the analysis from a declarative
description which we relate to a formal model of a
subset of the x86 instruction set architecture. The x86
model, developed in Coq, is of independent interest and
should be usable for a wide range of machine-level
verification tasks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Grebenshchikov:2012:SSV,
author = "Sergey Grebenshchikov and Nuno P. Lopes and Corneliu
Popeea and Andrey Rybalchenko",
title = "Synthesizing software verifiers from proof rules",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "405--416",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254112",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Automatically generated tools can significantly
improve programmer productivity. For example, parsers
and dataflow analyzers can be automatically generated
from declarative specifications in the form of
grammars, which tremendously simplifies the task of
implementing a compiler. In this paper, we present a
method for the automatic synthesis of software
verification tools. Our synthesis procedure takes as
input a description of the employed proof rule, e.g.,
program safety checking via inductive invariants, and
produces a tool that automatically discovers the
auxiliary assertions required by the proof rule, e.g.,
inductive loop invariants and procedure summaries. We
rely on a (standard) representation of proof rules
using recursive equations over the auxiliary
assertions. The discovery of auxiliary assertions,
i.e., solving the equations, is based on an iterative
process that extrapolates solutions obtained for
finitary unrollings of equations. We show how our
method synthesizes automatic safety and liveness
verifiers for programs with procedures, multi-threaded
programs, and functional programs. Our experimental
comparison of the resulting verifiers with existing
state-of-the-art verification tools confirms the
practicality of the approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hawkins:2012:CDR,
author = "Peter Hawkins and Alex Aiken and Kathleen Fisher and
Martin Rinard and Mooly Sagiv",
title = "Concurrent data representation synthesis",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "417--428",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254114",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "We describe an approach for synthesizing data
representations for concurrent programs. Our compiler
takes as input a program written using concurrent
relations and synthesizes a representation of the
relations as sets of cooperating data structures as
well as the placement and acquisition of locks to
synchronize concurrent access to those data structures.
The resulting code is correct by construction:
individual relational operations are implemented
correctly and the aggregate set of operations is
serializable and deadlock free. The relational
specification also permits a high-level optimizer to
choose the best performing of many possible legal data
representations and locking strategies, which we
demonstrate with an experiment autotuning a graph
benchmark.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2012:DSR,
author = "Feng Liu and Nayden Nedev and Nedyalko Prisadnikov and
Martin Vechev and Eran Yahav",
title = "Dynamic synthesis for relaxed memory models",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "429--440",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254115",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Modern architectures implement relaxed memory models
which may reorder memory operations or execute them
non-atomically. Special instructions called memory
fences are provided, allowing control of this behavior.
To implement a concurrent algorithm for a modern
architecture, the programmer is forced to manually
reason about subtle relaxed behaviors and figure out
ways to control these behaviors by adding fences to the
program. Not only is this process time consuming and
error-prone, but it has to be repeated every time the
implementation is ported to a different architecture.
In this paper, we present the first scalable framework
for handling real-world concurrent algorithms running
on relaxed architectures. Given a concurrent C program,
a safety specification, and a description of the memory
model, our framework tests the program on the memory
model to expose violations of the specification, and
synthesizes a set of necessary ordering constraints
that prevent these violations. The ordering constraints
are then realized as additional fences in the program.
We implemented our approach in a tool called DFence
based on LLVM and used it to infer fences in a number
of concurrent algorithms. Using DFence, we perform the
first in-depth study of the interaction between fences
in real-world concurrent C programs, correctness
criteria such as sequential consistency and
linearizability, and memory models such as TSO and PSO,
yielding many interesting observations. We believe that
this is the first tool that can handle programs at the
scale and complexity of a lock-free memory allocator.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Godefroid:2012:ASS,
author = "Patrice Godefroid and Ankur Taly",
title = "Automated synthesis of symbolic instruction encodings
from {I/O} samples",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "441--452",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254116",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Symbolic execution is a key component of precise
binary program analysis tools. We discuss how to
automatically boot-strap the construction of a symbolic
execution engine for a processor instruction set such
as x86, x64 or ARM. We show how to automatically
synthesize symbolic representations of individual
processor instructions from input/output examples and
express them as bit-vector constraints. We present and
compare various synthesis algorithms and instruction
sampling strategies. We introduce a new synthesis
algorithm based on smart sampling which we show is one
to two orders of magnitude faster than previous
synthesis algorithms in our context. With this new
algorithm, we can automatically synthesize bit-vector
circuits for over 500 x86 instructions (8/16/32-bits,
outputs, EFLAGS) using only 6 synthesis templates and
in less than two hours using the Z3 SMT solver on a
regular machine. During this work, we also discovered
several inconsistencies across x86 processors, errors
in the x86 Intel spec, and several bugs in previous
manually-written x86 instruction handlers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Benz:2012:DPA,
author = "Florian Benz and Andreas Hildebrandt and Sebastian
Hack",
title = "A dynamic program analysis to find floating-point
accuracy problems",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "453--462",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254118",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Programs using floating-point arithmetic are prone to
accuracy problems caused by rounding and catastrophic
cancellation. These phenomena provoke bugs that are
notoriously hard to track down: the program does not
necessarily crash and the results are not necessarily
obviously wrong, but often subtly inaccurate. Further
use of these values can lead to catastrophic errors. In
this paper, we present a dynamic program analysis that
supports the programmer in finding accuracy problems.
Our analysis uses binary translation to perform every
floating-point computation side by side in higher
precision. Furthermore, we use a lightweight slicing
approach to track the evolution of errors. We evaluate
our analysis by demonstrating that it catches
well-known floating-point accuracy problems and by
analyzing the Spec CFP2006 floating-point benchmark. In
the latter, we show how our tool tracks down a
catastrophic cancellation that causes a complete loss
of accuracy leading to a meaningless program result.
Finally, we apply our program to a complex, real-world
bioinformatics application in which our program
detected a serious cancellation. Correcting the
instability led not only to improved quality of the
result, but also to an improvement of the program's run
time.In this paper, we present a dynamic program
analysis that supports the programmer in finding
accuracy problems. Our analysis uses binary translation
to perform every floating-point computation side by
side in higher precision. Furthermore, we use a
lightweight slicing approach to track the evolution of
errors. We evaluate our analysis by demonstrating that
it catches well-known floating-point accuracy problems
and by analyzing the SpecfiCFP2006 floating-point
benchmark. In the latter, we show how our tool tracks
down a catastrophic cancellation that causes a complete
loss of accuracy leading to a meaningless program
result. Finally, we apply our program to a complex,
real-world bioinformatics application in which our
program detected a serious cancellation. Correcting the
instability led not only to improved quality of the
result, but also to an improvement of the program's run
time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lee:2012:CHP,
author = "Dongyoon Lee and Peter M. Chen and Jason Flinn and
Satish Narayanasamy",
title = "{Chimera}: hybrid program analysis for determinism",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "463--474",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254119",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Chimera uses a new hybrid program analysis to provide
deterministic replay for commodity multiprocessor
systems. Chimera leverages the insight that it is easy
to provide deterministic multiprocessor replay for
data-race-free programs (one can just record
non-deterministic inputs and the order of
synchronization operations), so if we can somehow
transform an arbitrary program to be data-race-free,
then we can provide deterministic replay cheaply for
that program. To perform this transformation, Chimera
uses a sound static data-race detector to find all
potential data-races. It then instruments pairs of
potentially racing instructions with a weak-lock, which
provides sufficient guarantees to allow deterministic
replay but does not guarantee mutual exclusion.
Unsurprisingly, a large fraction of data-races found by
the static tool are false data-races, and instrumenting
them each of them with a weak-lock results in
prohibitively high overhead. Chimera drastically
reduces this cost from 53x to 1.39x by increasing the
granularity of weak-locks without significantly
compromising on parallelism. This is achieved by
employing a combination of profiling and symbolic
analysis techniques that target the sources of
imprecision in the static data-race detector. We find
that performance overhead for deterministic recording
is 2.4\% on average for Apache and desktop applications
and about 86\% for scientific applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{deKruijf:2012:SAC,
author = "Marc A. de Kruijf and Karthikeyan Sankaralingam and
Somesh Jha",
title = "Static analysis and compiler design for idempotent
processing",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "475--486",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254120",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Recovery functionality has many applications in
computing systems, from speculation recovery in modern
microprocessors to fault recovery in high-reliability
systems. Modern systems commonly recover using
checkpoints. However, checkpoints introduce overheads,
add complexity, and often save more state than
necessary. This paper develops a novel compiler
technique to recover program state without the
overheads of explicit checkpoints. The technique breaks
programs into idempotent regions ---regions that can be
freely re-executed---which allows recovery without
checkpointed state. Leveraging the property of
idempotence, recovery can be obtained by simple
re-execution. We develop static analysis techniques to
construct these regions and demonstrate low overheads
and large region sizes for an LLVM-based
implementation. Across a set of diverse benchmark
suites, we construct idempotent regions close in size
to those that could be obtained with perfect runtime
information. Although the resulting code runs more
slowly, typical performance overheads are in the range
of just 2-12\%. The paradigm of executing entire
programs as a series of idempotent regions we call
idempotent processing, and it has many applications in
computer systems. As a concrete example, we demonstrate
it applied to the problem of compiler-automated
hardware fault recovery. In comparison to two other
state-of-the-art techniques, redundant execution and
checkpoint-logging, our idempotent processing technique
outperforms both by over 15\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Feng:2012:EPL,
author = "Min Feng and Rajiv Gupta and Iulian Neamtiu",
title = "Effective parallelization of loops in the presence of
{I/O} operations",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "487--498",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254122",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Software-based thread-level parallelization has been
widely studied for exploiting data parallelism in
purely computational loops to improve program
performance on multiprocessors. However, none of the
previous efforts deal with efficient parallelization of
hybrid loops, i.e., loops that contain a mix of
computation and I/O operations. In this paper, we
propose a set of techniques for efficiently
parallelizing hybrid loops. Our techniques apply DOALL
parallelism to hybrid loops by breaking the
cross-iteration dependences caused by I/O operations.
We also support speculative execution of I/O operations
to enable speculative parallelization of hybrid loops.
Helper threading is used to reduce the I/O bus
contention caused by the improved parallelism. We
provide an easy-to-use programming model for exploiting
parallelism in loops with I/O operations. Parallelizing
hybrid loops using our model requires few modifications
to the code. We have developed a prototype
implementation of our programming model. We have
evaluated our implementation on a 24-core machine using
eight applications, including a widely-used genomic
sequence assembler and a multi-player game server, and
others from PARSEC and SPEC CPU2000 benchmark suites.
The hybrid loops in these applications take 23\%-99\%
of the total execution time on our 24-core machine. The
parallelized applications achieve speedups of
3.0x-12.8x with hybrid loop parallelization over the
sequential versions of the same applications. Compared
to the versions of applications where only computation
loops are parallelized, hybrid loop parallelization
improves the application performance by 68\% on
average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2012:PSR,
author = "Chun Chen",
title = "Polyhedra scanning revisited",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "499--508",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254123",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "This paper presents a new polyhedra scanning system
called CodeGen+ to address the challenge of generating
high-performance code for complex iteration spaces
resulting from compiler optimization and autotuning
systems. The strength of our approach lies in two new
algorithms. First, a loop overhead removal algorithm
provides precise control of trade-offs between loop
overhead and code size based on actual loop nesting
depth. Second, an if-statement simplification algorithm
further reduces the number of comparisons in the code.
These algorithms combined with the expressive power of
Presburger arithmetic enable CodeGen+ to support
complex optimization strategies expressed in iteration
spaces. We compare with the state-of-the-art polyhedra
scanning tool CLooG on five loop nest computations,
demonstrating that CodeGen+ generates code that is
simpler and up to 1.15x faster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Oancea:2012:LIT,
author = "Cosmin E. Oancea and Lawrence Rauchwerger",
title = "Logical inference techniques for loop
parallelization",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "509--520",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254124",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "This paper presents a fully automatic approach to loop
parallelization that integrates the use of static and
run-time analysis and thus overcomes many known
difficulties such as nonlinear and indirect array
indexing and complex control flow. Our hybrid analysis
framework validates the parallelization transformation
by verifying the independence of the loop's memory
references. To this end it represents array references
using the USR (uniform set representation) language and
expresses the independence condition as an equation, S
=0, where S is a set expression representing array
indexes. Using a language instead of an
array-abstraction representation for S results in a
smaller number of conservative approximations but
exhibits a potentially-high runtime cost. To alleviate
this cost we introduce a language translation F from
the USR set-expression language to an equally rich
language of predicates ($F(S) \implies S = 0$). Loop
parallelization is then validated using a novel logic
inference algorithm that factorizes the obtained
complex predicates (F( S )) into a sequence of
sufficient independence conditions that are evaluated
first statically and, when needed, dynamically, in
increasing order of their estimated complexities. We
evaluate our automated solution on 26 benchmarks from
PERFECT-Club and SPEC suites and show that our approach
is effective in parallelizing large, complex loops and
obtains much better full program speedups than the
Intel and IBM Fortran compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pradel:2012:FAP,
author = "Michael Pradel and Thomas R. Gross",
title = "Fully automatic and precise detection of thread safety
violations",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "521--530",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254126",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Concurrent, object-oriented programs often use
thread-safe library classes. Existing techniques for
testing a thread-safe class either rely on tests using
the class, on formal specifications, or on both.
Unfortunately, these techniques often are not fully
automatic as they involve the user in analyzing the
output. This paper presents an automatic testing
technique that reveals concurrency bugs in supposedly
thread-safe classes. The analysis requires as input
only the class under test and reports only true
positives. The key idea is to generate tests in which
multiple threads call methods on a shared instance of
the tested class. If a concurrent test exhibits an
exception or a deadlock that cannot be triggered in any
linearized execution of the test, the analysis reports
a thread safety violation. The approach is easily
applicable, because it is independent of hand-written
tests and explicit specifications. The analysis finds
15 concurrency bugs in popular Java libraries,
including two previously unknown bugs in the Java
standard library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Raman:2012:SPD,
author = "Raghavan Raman and Jisheng Zhao and Vivek Sarkar and
Martin Vechev and Eran Yahav",
title = "Scalable and precise dynamic datarace detection for
structured parallelism",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "531--542",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254127",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Existing dynamic race detectors suffer from at least
one of the following three limitations: (i) space
overhead per memory location grows linearly with the
number of parallel threads [13], severely limiting the
parallelism that the algorithm can handle; (ii)
sequentialization: the parallel program must be
processed in a sequential order, usually depth-first
[12, 24]. This prevents the analysis from scaling with
available hardware parallelism, inherently limiting its
performance; (iii) inefficiency: even though race
detectors with good theoretical complexity exist, they
do not admit efficient implementations and are
unsuitable for practical use [4, 18]. We present a new
precise dynamic race detector that leverages structured
parallelism in order to address these limitations. Our
algorithm requires constant space per memory location,
works in parallel, and is efficient in practice. We
implemented and evaluated our algorithm on a set of 15
benchmarks. Our experimental results indicate an
average (geometric mean) slowdown of 2.78x on a 16-core
SMP system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nagarakatte:2012:MAP,
author = "Santosh Nagarakatte and Sebastian Burckhardt and Milo
M. K. Martin and Madanlal Musuvathi",
title = "Multicore acceleration of priority-based schedulers
for concurrency bug detection",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "543--554",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254128",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Testing multithreaded programs is difficult as threads
can interleave in a nondeterministic fashion. Untested
interleavings can cause failures, but testing all
interleavings is infeasible. Many interleaving
exploration strategies for bug detection have been
proposed, but their relative effectiveness and
performance remains unclear as they often lack publicly
available implementations and have not been evaluated
using common benchmarks. We describe NeedlePoint, an
open-source framework that allows selection and
comparison of a wide range of interleaving exploration
policies for bug detection proposed by prior work. Our
experience with NeedlePoint indicates that
priority-based probabilistic concurrency testing (the
PCT algorithm) finds bugs quickly, but it runs only one
thread at a time, which destroys parallelism by
serializing executions. To address this problem we
propose a parallel version of the PCT algorithm (PPCT).
We show that the new algorithm outperforms the original
by a factor of 5x when testing parallel programs on an
eight-core machine. We formally prove that parallel PCT
provides the same probabilistic coverage guarantees as
PCT. Moreover, PPCT is the first algorithm that runs
multiple threads while providing coverage guarantees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nieh:2012:CBR,
author = "Jason Nieh",
title = "Challenges in building a real, large private cloud",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "1--2",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151026",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Virtualization and internal cloud are often touted as
the solution to many challenging problems, from
resource underutilization to data-center optimization
and carbon emission reduction. However, the hidden
costs of cloud-scale virtualization, largely stemming
from the complex and difficult system administration
challenges it poses, are often overlooked. Reaping the
fruits of internal Infrastructure as a Service cloud
requires the enterprise to navigate scalability
limitations, revamp traditional operational practices,
manage performance, and achieve unprecedented
cross-silo collaboration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kundu:2012:MVA,
author = "Sajib Kundu and Raju Rangaswami and Ajay Gulati and
Ming Zhao and Kaushik Dutta",
title = "Modeling virtualized applications using machine
learning techniques",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "3--14",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151028",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "With the growing adoption of virtualized datacenters
and cloud hosting services, the allocation and sizing
of resources such as CPU, memory, and I/O bandwidth for
virtual machines (VMs) is becoming increasingly
important. Accurate performance modeling of an
application would help users in better VM sizing, thus
reducing costs. It can also benefit cloud service
providers who can offer a new charging model based on
the VMs' performance instead of their configured sizes.
In this paper, we present techniques to model the
performance of a VM-hosted application as a function of
the resources allocated to the VM and the resource
contention it experiences. To address this
multi-dimensional modeling problem, we propose and
refine the use of two machine learning techniques:
artificial neural network (ANN) and support vector
machine (SVM). We evaluate these modeling techniques
using five virtualized applications from the RUBiS and
Filebench suite of benchmarks and demonstrate that
their median and 90th percentile prediction errors are
within 4.36\% and 29.17\% respectively. These results
are substantially better than regression based
approaches as well as direct applications of machine
learning techniques without our refinements. We also
present a simple and effective approach to VM sizing
and empirically demonstrate that it can deliver optimal
results for 65\% of the sizing problems that we studied
and produces close-to-optimal sizes for the remaining
35\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lv:2012:VCV,
author = "Hui Lv and Yaozu Dong and Jiangang Duan and Kevin
Tian",
title = "Virtualization challenges: a view from server
consolidation perspective",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "15--26",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151030",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Server consolidation, by running multiple virtual
machines on top of a single platform with
virtualization, provides an efficient solution to
parallelism and utilization of modern multi-core
processors system. However, the performance and
scalability of server consolidation solution on modern
massive advanced server is not well addressed. In this
paper, we conduct a comprehensive study of Xen
performance and scalability characterization running
SPECvirt\_sc2010, and identify that large memory and
cache footprint, due to the unnecessary high frequent
context switch, introduce additional challenges to the
system performance and scalability. We propose two
optimizations (dynamically-allocable tasklets and
context-switch rate controller) to improve the
performance. The results show the improved memory and
cache efficiency with a reduction of the overall CPI,
resulting in an improvement of server consolidation
capability by 15\% in SPECvirt\_sc2010. In the
meantime, our optimization achieves an up to 50\%
acceleration of service response, which greatly
improves the QoS of Xen virtualization solution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wang:2012:RCV,
author = "Wei Wang and Tanima Dey and Ryan W. Moore and Mahmut
Aktasoglu and Bruce R. Childers and Jack W. Davidson
and Mary Jane Irwin and Mahmut Kandemir and Mary Lou
Soffa",
title = "{REEact}: a customizable virtual execution manager for
multicore platforms",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "27--38",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151031",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "With the shift to many-core chip multiprocessors
(CMPs), a critical issue is how to effectively
coordinate and manage the execution of applications and
hardware resources to overcome performance, power
consumption, and reliability challenges stemming from
hardware and application variations inherent in this
new computing environment. Effective resource and
application management on CMPs requires consideration
of user/application/hardware-specific requirements and
dynamic adaption of management decisions based on the
actual run-time environment. However, designing an
algorithm to manage resources and applications that can
dynamically adapt based on the run-time environment is
difficult because most resource and application
management and monitoring facilities are only available
at the operating system level. This paper presents
REEact, an infrastructure that provides the capability
to specify user-level management policies with dynamic
adaptation. REEact is a virtual execution environment
that provides a framework and core services to quickly
enable the design of custom management policies for
dynamically managing resources and applications. To
demonstrate the capabilities and usefulness of REEact,
this paper describes three case studies--each
illustrating the use of REEact to apply a specific
dynamic management policy on a real CMP. Through these
case studies, we demonstrate that REEact can
effectively and efficiently implement policies to
dynamically manage resources and adapt application
execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ma:2012:DTD,
author = "Zhiqiang Ma and Zhonghua Sheng and Lin Gu and Liufei
Wen and Gong Zhang",
title = "{DVM}: towards a datacenter-scale virtual machine",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "39--50",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151032",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "As cloud-based computation becomes increasingly
important, providing a general computational interface
to support datacenter-scale programming has become an
imperative research agenda. Many cloud systems use
existing virtual machine monitor (VMM) technologies,
such as Xen, VMware, and Windows Hypervisor, to
multiplex a physical host into multiple virtual hosts
and isolate computation on the shared cluster platform.
However, traditional multiplexing VMMs do not scale
beyond one single physical host, and it alone cannot
provide the programming interface and cluster-wide
computation that a datacenter system requires. We
design a new instruction set architecture, DISA, to
unify myriads of compute nodes to form a big virtual
machine called DVM, and present programmers the view of
a single computer where thousands of tasks run
concurrently in a large, unified, and snapshotted
memory space. The DVM provides a simple yet scalable
programming model and mitigates the scalability
bottleneck of traditional distributed shared memory
systems. Along with an efficient execution engine, the
capacity of a DVM can scale up to support large
clusters. We have implemented and tested DVM on three
platforms, and our evaluation shows that DVM has
excellent performance in terms of execution time and
speedup. On one physical host, the system overhead of
DVM is comparable to that of traditional VMMs. On 16
physical hosts, the DVM runs 10 times faster than
MapReduce/Hadoop and X10. On 256 EC2 instances, DVM
shows linear speedup on a parallelizable workload.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yu:2012:SCO,
author = "Tingting Yu and Witawas Srisa-an and Gregg Rothermel",
title = "{SimTester}: a controllable and observable testing
framework for embedded systems",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "51--62",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151034",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "In software for embedded systems, the frequent use of
interrupts for timing, sensing, and I/O processing can
cause concurrency faults to occur due to interactions
between applications, device drivers, and interrupt
handlers. This type of fault is considered by many
practitioners to be among the most difficult to detect,
isolate, and correct, in part because it can be
sensitive to execution interleavings and often occurs
without leaving any observable incorrect output. As
such, commonly used testing techniques that inspect
program outputs to detect failures are often
ineffective at detecting them. To test for these
concurrency faults, test engineers need to be able to
control interleavings so that they are deterministic.
Furthermore, they also need to be able to observe
faults as they occur instead of relying on observable
incorrect outputs. In this paper, we introduce
SimTester, a framework that allows engineers to
effectively test for subtle and non-deterministic
concurrency faults by providing them with greater
controllability and observability. We implemented our
framework on a commercial virtual platform that is
widely used to support hardware/software co-designs to
promote ease of adoption. We then evaluated its
effectiveness by using it to test for data races and
deadlocks. The result shows that our framework can be
effective and efficient at detecting these faults.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhang:2012:SRB,
author = "Yuan Zhang and Min Yang and Bo Zhou and Zhemin Yang
and Weihua Zhang and Binyu Zang",
title = "{Swift}: a register-based {JIT} compiler for embedded
{JVMs}",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "63--74",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151035",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Code quality and compilation speed are two challenges
to JIT compilers, while selective compilation is
commonly used to trade-off these two issues. Meanwhile,
with more and more Java applications running in mobile
devices, selective compilation meets many problems.
Since these applications always have flat execution
profile and short live time, a lightweight JIT
technique without losing code quality is extremely
needed. However, the overhead of compiling stack-based
Java bytecode to heterogeneous register-based machine
code is significant in embedded devices. This paper
presents a fast and effective JIT technique for mobile
devices, building on a register-based Java bytecode
format which is more similar to the underlying machine
architecture. Through a comprehensive study on the
characteristics of Java applications, we observe that
virtual registers used by more than 90\% Java methods
can be directly fulfilled by 11 physical registers.
Based on this observation, this paper proposes Swift, a
novel JIT compiler on register-based bytecode, which
generates native code for RISC machines. After mapping
virtual registers to physical registers, the code is
generated efficiently by looking up a translation
table. And the code quality is guaranteed by the static
compiler which is used to generate register-based
bytecode. Besides, we design two lightweight
optimizations and an efficient code unloader to make
Swift more suitable for embedded environment. As the
prevalence of Android, a prototype of Swift is
implemented upon DEX bytecode which is the official
distribution format of Android applications. Swift is
evaluated with three benchmarks (SPECjvm98,
EmbeddedCaffeineMark3 and JemBench2) on two different
ARM SOCs: S3C6410 (armv6) and OMAP3530 (armv7). The
results show that Swift achieves a speedup of 3.13 over
the best-performing interpreter on the selected
benchmarks. Compared with the state-of-the-art JIT
compiler in Android, JITC-Droid, Swift achieves a
speedup of 1.42.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shan:2012:FIA,
author = "Zhiyong Shan and Xin Wang and Tzi-cker Chiueh and
Xiaofeng Meng",
title = "Facilitating inter-application interactions for
{OS}-level virtualization",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "75--86",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151036",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "OS-level virtualization generates a minimal start-up
and run-time overhead on the host OS and thus suits
applications that require both good isolation and high
efficiency. However, multiple-member applications
required for forming a system may need to occasionally
communicate across this isolation barrier to cooperate
with each other while they are separated in different
VMs to isolate intrusion or fault. Such application
scenarios are often critical to enterprise-class
servers, HPC clusters and intrusion/fault-tolerant
systems, etc. We make the first effort to support the
inter-application interactions in an OS-level
virtualization system without causing a significant
compromise on VM isolation. We identify all interactive
operations that impact inter-application interactions,
including inter-process communications, application
invocations, resource name transfers and application
dependencies. We propose Shuttle, a novel approach for
facilitating inter-application interactions within and
across OS-level virtual machines. Our results
demonstrate that Shuttle can correctly address all
necessary inter-application interactions while
providing good isolation capability to all sample
applications on different versions of Windows OS.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gerofi:2012:ETT,
author = "Balazs Gerofi and Yutaka Ishikawa",
title = "Enhancing {TCP} throughput of highly available virtual
machines via speculative communication",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "87--96",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151038",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Checkpoint-recovery based virtual machine (VM)
replication is an attractive technique for
accommodating VM installations with high-availability.
It provides seamless failover for the entire software
stack executed in the VM regardless the application or
the underlying operating system (OS), it runs on
commodity hardware, and it is inherently capable of
dealing with shared memory non-determinism of symmetric
multiprocessing (SMP) configurations. There have been
several studies aiming at alleviating the overhead of
replication, however, due to consistency requirements,
network performance of the basic replication mechanism
remains extremely poor., In this paper we revisit the
replication protocol and extend it with speculative
communication. Speculative communication silently
acknowledges TCP packets of the VM, enabling the
guest's TCP stack to progress with transmission without
exposing the messages to the clients before the
corresponding execution state is checkpointed to the
backup host. Furthermore, we propose replication aware
congestion control, an extension to the guest's TCP
stack that aggressively fills up the VMM's replication
buffer so that speculative packets can be backed up and
released earlier to the clients. We observe up to an
order of magnitude improvement in bulk data transfer
with speculative communication, and close to native VM
network performance when replication awareness is
enabled in the guest OS. We provide results of micro-,
as well as application-level benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Rajagopalan:2012:SDT,
author = "Shriram Rajagopalan and Brendan Cully and Ryan
O'Connor and Andrew Warfield",
title = "{SecondSite}: disaster tolerance as a service",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "97--108",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151039",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "This paper describes the design and implementation of
SecondSite, a cloud-based service for disaster
tolerance. SecondSite extends the Remus
virtualization-based high availability system by
allowing groups of virtual machines to be replicated
across data centers over wide-area Internet links. The
goal of the system is to commodify the property of
availability, exposing it as a simple tick box when
configuring a new virtual machine. To achieve this in
the wide area, we have had to tackle the related issues
of replication traffic bandwidth, reliable failure
detection across geographic regions and traffic
redirection over a wide-area network without
compromising on transparency and consistency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pan:2012:CLM,
author = "Zhenhao Pan and Yaozu Dong and Yu Chen and Lei Zhang
and Zhijiao Zhang",
title = "{CompSC}: live migration with pass-through devices",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "109--120",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151040",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Live migration is one of the most important features
of virtualization technology. With regard to recent
virtualization techniques, performance of network I/O
is critical. Current network I/O virtualization (e.g.
Para-virtualized I/O, VMDq) has a significant
performance gap with native network I/O. Pass-through
network devices have near native performance, however,
they have thus far prevented live migration. No
existing methods solve the problem of live migration
with pass-through devices perfectly. In this paper, we
propose CompSC: a solution of hardware state migration
that will enable the live migration support of
pass-through devices. We go on to apply CompSC to
SR-IOV network interface controllers. We discuss the
attributes of different hardware states in pass-through
devices and migrate them with corresponding techniques.
Our experiments show that CompSC enables live migration
on an Intel 82599 VF with a throughput 282.66\% higher
than para-virtualized devices. In addition, service
downtime during live migration is 42.9\% less than
para-virtualized devices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kemerlis:2012:LPD,
author = "Vasileios P. Kemerlis and Georgios Portokalidis and
Kangkook Jee and Angelos D. Keromytis",
title = "{{\tt libdft}}: practical dynamic data flow tracking
for commodity systems",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "121--132",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151042",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Dynamic data flow tracking (DFT) deals with tagging
and tracking data of interest as they propagate during
program execution. DFT has been repeatedly implemented
by a variety of tools for numerous purposes, including
protection from zero-day and cross-site scripting
attacks, detection and prevention of information leaks,
and for the analysis of legitimate and malicious
software. We present {\tt libdft}, a dynamic DFT
framework that unlike previous work is at once fast,
reusable, and works with commodity software and
hardware. {\tt libdft} provides an API for building
DFT-enabled tools that work on unmodified binaries,
running on common operating systems and hardware, thus
facilitating research and rapid prototyping. We explore
different approaches for implementing the low-level
aspects of instruction-level data tracking, introduce a
more efficient and 64-bit capable shadow memory, and
identify (and avoid) the common pitfalls responsible
for the excessive performance overhead of previous
studies. We evaluate {\tt libdft} using real
applications with large codebases like the Apache and
MySQL servers, and the Firefox web browser. We also use
a series of benchmarks and utilities to compare {\tt
libdft} with similar systems. Our results indicate that
it performs at least as fast, if not faster, than
previous solutions, and to the best of our knowledge,
we are the first to evaluate the performance overhead
of a fast dynamic DFT implementation in such depth.
Finally, {\tt libdft} is freely available as open
source software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bruening:2012:TDI,
author = "Derek Bruening and Qin Zhao and Saman Amarasinghe",
title = "Transparent dynamic instrumentation",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "133--144",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151043",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Process virtualization provides a virtual execution
environment within which an unmodified application can
be monitored and controlled while it executes. The
provided layer of control can be used for purposes
ranging from sandboxing to compatibility to profiling.
The additional operations required for this layer are
performed clandestinely alongside regular program
execution. Software dynamic instrumentation is one
method for implementing process virtualization which
dynamically instruments an application such that the
application's code and the inserted code are
interleaved together. DynamoRIO is a process
virtualization system implemented using software code
cache techniques that allows users to build customized
dynamic instrumentation tools. There are many
challenges to building such a runtime system. One major
obstacle is transparency. In order to support executing
arbitrary applications, DynamoRIO must be fully
transparent so that an application cannot distinguish
between running inside the virtual environment and
native execution. In addition, any desired extra
operations for a particular tool must avoid interfering
with the behavior of the application. Transparency has
historically been provided on an ad-hoc basis, as a
reaction to observed problems in target applications.
This paper identifies a necessary set of transparency
requirements for running mainstream Windows and Linux
applications. We discuss possible solutions to each
transparency issue, evaluate tradeoffs between
different choices, and identify cases where maintaining
transparency is not practically solvable. We believe
this will provide a guideline for better design and
implementation of transparent dynamic instrumentation,
as well as other similar process virtualization systems
using software code caches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lefebvre:2012:EM,
author = "Geoffrey Lefebvre and Brendan Cully and Christopher
Head and Mark Spear and Norm Hutchinson and Mike Feeley
and Andrew Warfield",
title = "Execution mining",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "145--158",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151044",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Operating systems represent large pieces of complex
software that are carefully tested and broadly
deployed. Despite this, developers frequently have
little more than their source code to understand how
they behave. This static representation of a system
results in limited insight into execution dynamics,
such as what code is important, how data flows through
a system, or how threads interact with one another. We
describe Tralfamadore, a system that preserves complete
traces of machine execution as an artifact that can be
queried and analyzed with a library of simple, reusable
operators, making it easy to develop and run new
dynamic analyses. We demonstrate the benefits of this
approach with several example applications, including a
novel unified source and execution browser.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pavlou:2012:DBD,
author = "Demos Pavlou and Enric Gibert and Fernando Latorre and
Antonio Gonzalez",
title = "{DDGacc}: boosting dynamic {DDG}-based binary
optimizations through specialized hardware support",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "159--168",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151046",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Dynamic Binary Translators (DBT) and Dynamic Binary
Optimization (DBO) by software are used widely for
several reasons including performance, design
simplification and virtualization. However, the
software layer in such systems introduces
non-negligible overheads which affect performance and
user experience. Hence, reducing DBT/DBO overheads is
of paramount importance. In addition, reduced overheads
have interesting collateral effects in the rest of the
software layer, such as allowing optimizations to be
applied earlier. A cost-effective solution to this
problem is to provide hardware support to speed up the
primitives of the software layer, paying special
attention to automate DBT/DBO mechanisms and leave the
heuristics to the software, which is more flexible. In
this work, we have characterized the overheads of a DBO
system using DynamoRIO implementing several basic
optimizations. We have seen that the computation of the
Data Dependence Graph (DDG) accounts for 5\%-10\% of
the execution time. For this reason, we propose to add
hardware support for this task in the form of a new
functional unit, called DDGacc, which is integrated in
a conventional pipeline processor and is operated
through new ISA instructions. Our evaluation shows that
DDGacc reduces the cost of computing the DDG by 32x,
which reduces overall execution time by 5\%-10\% on
average and up to 18\% for applications where the DBO
optimizes large code footprints.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ishizaki:2012:ADT,
author = "Kazuaki Ishizaki and Takeshi Ogasawara and Jose
Castanos and Priya Nagpurkar and David Edelsohn and
Toshio Nakatani",
title = "Adding dynamically-typed language support to a
statically-typed language compiler: performance
evaluation, analysis, and tradeoffs",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "169--180",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151047",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Applications written in dynamically typed scripting
languages are increasingly popular for Web software
development. Even on the server side, programmers are
using dynamically typed scripting languages such as
Ruby and Python to build complex applications quickly.
As the number and complexity of dynamically typed
scripting language applications grows, optimizing their
performance is becoming important. Some of the best
performing compilers and optimizers for dynamically
typed scripting languages are developed entirely from
scratch and target a specific language. This approach
is not scalable, given the variety of dynamically typed
scripting languages, and the effort involved in
developing and maintaining separate infrastructures for
each. In this paper, we evaluate the feasibility of
adapting and extending an existing production-quality
method-based Just-In-Time (JIT) compiler for a language
with dynamic types. Our goal is to identify the
challenges and shortcomings with the current
infrastructure, and to propose and evaluate runtime
techniques and optimizations that can be incorporated
into a common optimization infrastructure for static
and dynamic languages. We discuss three extensions to
the compiler to support dynamically typed languages:
(1) simplification of control flow graphs, (2) mapping
of memory locations to stack-allocated variables, and
(3) reduction of runtime overhead using language
semantics. We also propose four new optimizations for
Python in (2) and (3). These extensions are effective
in reduction of compiler working memory and improvement
of runtime performance. We present a detailed
performance evaluation of our approach for Python,
finding an overall improvement of 1.69x on average (up
to 2.74x) over our JIT compiler without any
optimization for dynamically typed languages and
Python.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lin:2012:UKT,
author = "Yi Lin and Stephen M. Blackburn and Daniel Frampton",
title = "Unpicking the knot: teasing apart {VM}\slash
application interdependencies",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "181--190",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151048",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Flexible and efficient runtime design requires an
understanding of the dependencies among the components
internal to the runtime and those between the
application and the runtime. These dependencies are
frequently unclear. This problem exists in all runtime
design, and is most vivid in a metacircular runtime ---
one that is implemented in terms of itself.
Metacircularity blurs boundaries between application
and runtime implementation, making it harder to
understand and make guarantees about overall system
behavior, affecting isolation, security, and resource
management, as well as reducing opportunities for
optimization. Our goal is to shed new light on VM
interdependencies, helping all VM designers understand
these dependencies and thereby engineer better
runtimes. We explore these issues in the context of a
high-performance Java-in-Java virtual machine. Our
approach is to identify and instrument transition
points into and within the runtime, which allows us to
establish a dynamic execution context. Our
contributions are: (1) implementing and measuring a
system that dynamically maintains execution context
with very low overhead, (2) demonstrating that such a
framework can be used to improve the software
engineering of an existing runtime, and (3) analyzing
the behavior and runtime characteristics of our runtime
across a wide range of benchmarks. Our solution
provides clarity about execution state and allowable
transitions, making it easier to develop, debug, and
understand managed runtimes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tuch:2012:BSV,
author = "Harvey Tuch and Cyprien Laplace and Kenneth C. Barr
and Bi Wu",
title = "Block storage virtualization with commodity secure
digital cards",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "191--202",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151050",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Smartphones, tablets and other mobile platforms
typically accommodate bulk data storage with low-cost,
FAT-formatted Secure Digital cards. When one uses a
mobile device to run a full-system virtual machine
(VM), there can be a mismatch between (1) the VM's I/O
mixture, security and reliability requirements and (2)
the properties of the storage media available for VM
block storage and checkpoint images. To resolve this
mismatch, this paper presents a new VM disk image
format called the Logging Block Store (LBS). After
motivating the need for a new format, LBS is described
in detail with experimental results demonstrating its
efficacy. As a result of this work, recommendations are
made for future optimizations throughout the stack that
may simplify and improve the performance of storage
virtualization systems on mobile platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ghosh:2012:RAA,
author = "Sudeep Ghosh and Jason Hiser and Jack W. Davidson",
title = "Replacement attacks against {VM}-protected
applications",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "203--214",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151051",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Process-level virtualization is increasingly being
used to enhance the security of software applications
from reverse engineering and unauthorized modification
(called software protection). Process-level virtual
machines (PVMs) can safeguard the application code at
run time and hamper the adversary's ability to launch
dynamic attacks on the application. This dynamic
protection, combined with its flexibility, ease in
handling legacy systems and low performance overhead,
has made process-level virtualization a popular
approach for providing software protection. While there
has been much research on using process-level
virtualization to provide such protection, there has
been less research on attacks against PVM-protected
software. In this paper, we describe an attack on
applications protected using process-level
virtualization, called a replacement attack. In a
replacement attack, the adversary replaces the
protecting PVM with an attack VM thereby rendering the
application vulnerable to analysis and modification. We
present a general description of the replacement attack
methodology and two attack implementations against a
protected application using freely available tools. The
generality and simplicity of replacement attacks
demonstrates that there is a strong need to develop
techniques that meld applications more tightly to the
protecting PVM to prevent such attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Payer:2012:PAA,
author = "Mathias Payer and Thomas R. Gross",
title = "Protecting applications against {TOCTTOU} races by
user-space caching of file metadata",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "215--226",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151052",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Time Of Check To Time Of Use (TOCTTOU) race conditions
for file accesses in user-space applications are a
common problem in Unix-like systems. The mapping
between filename and inode and device is volatile and
can provide the necessary preconditions for an exploit.
Applications use filenames as the primary attribute to
identify files but the mapping between filenames and
inode and device can be changed by an attacker.
DynaRace is an approach that protects unmodified
applications from file-based TOCTTOU race conditions.
DynaRace uses a transparent mapping cache that keeps
additional state and metadata for each accessed file in
the application. The combination of file state and the
current system call type are used to decide if (i) the
metadata is updated or (ii) the correctness of the
metadata is enforced between consecutive system calls.
DynaRace uses user-mode path resolution internally to
resolve individual file atoms. Each file atom is
verified or updated according to the associated state
in the mapping cache. More specifically, DynaRace
protects against race conditions for all file-based
system calls, by replacing the unsafe system calls with
a set of safe system calls that utilize the mapping
cache. The system call is executed only if the state
transition is allowed and the information in the
mapping cache matches. DynaRace deterministically
solves the problem of file-based race conditions for
unmodified applications and removes an attacker's
ability to exploit the TOCTTOU race condition. DynaRace
detects injected alternate inode and device pairs and
terminates the application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yan:2012:VCH,
author = "Lok-Kwong Yan and Manjukumar Jayachandra and Mu Zhang
and Heng Yin",
title = "{V2E}: combining hardware virtualization and software
emulation for transparent and extensible malware
analysis",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "227--238",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151053",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "A transparent and extensible malware analysis platform
is essential for defeating malware. This platform
should be transparent so malware cannot easily detect
and bypass it. It should also be extensible to provide
strong support for heavyweight instrumentation and
analysis efficiency. However, no existing platform can
meet both requirements. Leveraging hardware
virtualization technology, analysis platforms like
Ether can achieve good transparency, but its
instrumentation support and analysis efficiency is
poor. In contrast, software emulation provides strong
support for code instrumentation and good analysis
efficiency by using dynamic binary translation.
However, analysis platforms based on software emulation
can be easily detected by malware and thus is poor in
transparency. To achieve both transparency and
extensibility, we propose a new analysis platform that
combines hardware virtualization and software
emulation. The essence is precise heterogeneous replay:
the malware execution is recorded via hardware
virtualization and then replayed in software. Our
design ensures the execution replay is precise.
Moreover, with page-level recording granularity, the
platform can easily adjust to analyze various forms of
malware (a process, a kernel module, or a shared
library). We implemented a prototype called V2E and
demonstrated its capability and efficiency by
conducting an extensive evaluation with both synthetic
samples and 14 realworld emulation-resistant malware
samples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Huynh:2012:SFM,
author = "Huynh Phung Huynh and Andrei Hagiescu and Weng-Fai
Wong and Rick Siow Mong Goh",
title = "Scalable framework for mapping streaming applications
onto multi-{GPU} systems",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "1--10",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145818",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Graphics processing units leverage on a large array of
parallel processing cores to boost the performance of a
specific streaming computation pattern frequently found
in graphics applications. Unfortunately, while many
other general purpose applications do exhibit the
required streaming behavior, they also possess
unfavorable data layout and poor
computation-to-communication ratios that penalize any
straight-forward execution on the GPU. In this paper we
describe an efficient and scalable code generation
framework that can map general purpose streaming
applications onto a multi-GPU system. This framework
spans the entire core and memory hierarchy exposed by
the multi-GPU system. Several key features in our
framework ensure the scalability required by complex
streaming applications. First, we propose an efficient
stream graph partitioning algorithm that partitions the
complex application to achieve the best performance
under a given shared memory constraint. Next, the
resulting partitions are mapped to multiple GPUs using
an efficient architecture-driven strategy. The mapping
balances the workload while considering the
communication overhead. Finally, a highly effective
pipeline execution is employed for the execution of the
partitions on the multi-GPU system. The framework has
been implemented as a back-end of the StreamIt
programming language compiler. Our comprehensive
experiments show its scalability and significant
performance speedup compared with a previous
state-of-the-art solution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sim:2012:PAF,
author = "Jaewoong Sim and Aniruddha Dasgupta and Hyesoon Kim
and Richard Vuduc",
title = "A performance analysis framework for identifying
potential benefits in {GPGPU} applications",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "11--22",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145819",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Tuning code for GPGPU and other emerging many-core
platforms is a challenge because few models or tools
can precisely pinpoint the root cause of performance
bottlenecks. In this paper, we present a performance
analysis framework that can help shed light on such
bottlenecks for GPGPU applications. Although a handful
of GPGPU profiling tools exist, most of the traditional
tools, unfortunately, simply provide programmers with a
variety of measurements and metrics obtained by running
applications, and it is often difficult to map these
metrics to understand the root causes of slowdowns,
much less decide what next optimization step to take to
alleviate the bottleneck. In our approach, we first
develop an analytical performance model that can
precisely predict performance and aims to provide
programmer-interpretable metrics. Then, we apply static
and dynamic profiling to instantiate our performance
model for a particular input code and show how the
model can predict the potential performance benefits.
We demonstrate our framework on a suite of
micro-benchmarks as well as a variety of computations
extracted from real codes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Baghsorkhi:2012:EPE,
author = "Sara S. Baghsorkhi and Isaac Gelado and Matthieu
Delahaye and Wen-mei W. Hwu",
title = "Efficient performance evaluation of memory hierarchy
for highly multithreaded graphics processors",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "23--34",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145820",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "With the emergence of highly multithreaded
architectures, performance monitoring techniques face
new challenges in efficiently locating sources of
performance discrepancies in the program source code.
For example, the state-of-the-art performance counters
in highly multithreaded graphics processing units
(GPUs) report only the overall occurrences of
microarchitecture events at the end of program
execution. Furthermore, even if supported, any
fine-grained sampling of performance counters will
distort the actual program behavior and will make the
sampled values inaccurate. On the other hand, it is
difficult to achieve high resolution performance
information at low sampling rates in the presence of
thousands of concurrently running threads. In this
paper, we present a novel software-based approach for
monitoring the memory hierarchy performance in highly
multithreaded general-purpose graphics processors. The
proposed analysis is based on memory traces collected
for snapshots of an application execution. A
trace-based memory hierarchy model with a Monte Carlo
experimental methodology generates statistical bounds
of performance measures without being concerned about
the exact inter-thread ordering of individual events
but rather studying the behavior of the overall system.
The statistical approach overcomes the classical
problem of disturbed execution timing due to
fine-grained instrumentation. The approach scales well
as we deploy an efficient parallel trace collection
technique to reduce the trace generation overhead and a
simple memory hierarchy model to reduce the simulation
time. The proposed scheme also keeps track of
individual memory operations in the source code and can
quantify their efficiency with respect to the memory
system. A cross-validation of our results shows close
agreement with the values read from the hardware
performance counters on an NVIDIA Tesla C2050 GPU.
Based on the high resolution profile data produced by
our model we optimized memory accesses in the sparse
matrix vector multiply kernel and achieved speedups
ranging from 2.4 to 14.8 depending on the
characteristics of the input matrices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ballard:2012:CAS,
author = "Grey Ballard and James Demmel and Nicholas Knight",
title = "Communication avoiding successive band reduction",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "35--44",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145822",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The running time of an algorithm depends on both
arithmetic and communication (i.e., data movement)
costs, and the relative costs of communication are
growing over time. In this work, we present both
theoretical and practical results for tridiagonalizing
a symmetric band matrix: we present an algorithm that
asymptotically reduces communication, and we show that
it indeed performs well in practice. The
tridiagonalization of a symmetric band matrix is a key
kernel in solving the symmetric eigenvalue problem for
both full and band matrices. In order to preserve
sparsity, tridiagonalization routines use
annihilate-and-chase procedures that previously have
suffered from poor data locality. We improve data
locality by reorganizing the computation,
asymptotically reducing communication costs compared to
existing algorithms. Our sequential implementation
demonstrates that avoiding communication improves
runtime even at the expense of extra arithmetic: we
observe a 2x speedup over Intel MKL while doing 43\%
more floating point operations. Our parallel
implementation targets shared-memory multicore
platforms. It uses pipelined parallelism and a static
scheduler while retaining the locality properties of
the sequential algorithm. Due to lightweight
synchronization and effective data reuse, we see 9.5x
scaling over our serial code and up to 6x speedup over
the PLASMA library, comparing parallel performance on a
ten-core processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sack:2012:FTA,
author = "Paul Sack and William Gropp",
title = "Faster topology-aware collective algorithms through
non-minimal communication",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "45--54",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145823",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Known algorithms for two important collective
communication operations, allgather and reduce-scatter,
are minimal-communication algorithms; no process sends
or receives more than the minimum amount of data. This,
combined with the data-ordering semantics of the
operations, limits the flexibility and performance of
these algorithms. Our novel non-minimal, topology-aware
algorithms deliver far better performance with the
addition of a very small amount of redundant
communication. We develop novel algorithms for Clos
networks and single or multi-ported torus networks.
Tests on a 32k-node BlueGene/P result in allgather
speedups of up to 6x and reduce-scatter speedups of
over 11x compared to the native IBM algorithm.
Broadcast, reduce, and allreduce can be composed of
allgather or reduce-scatter and other collective
operations; our techniques also improve the performance
of these algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kim:2012:ESC,
author = "Seonggun Kim and Hwansoo Han",
title = "Efficient {SIMD} code generation for irregular
kernels",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "55--64",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145824",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Array indirection causes several challenges for
compilers to utilize single instruction, multiple data
(SIMD) instructions. Disjoint memory references,
arbitrarily misaligned memory references, and
dependence cycles in loops are main challenges to
handle for SIMD compilers. Due to those challenges,
existing SIMD compilers have excluded loops with array
indirection from their candidate loops for SIMD
vectorization. However, addressing those challenges is
inevitable, since many important compute-intensive
applications extensively use array indirection to
reduce memory and computation requirements. In this
work, we propose a method to generate efficient SIMD
code for loops containing indirected memory references.
We extract both inter- and intra-iteration parallelism,
taking data reorganization overhead into consideration.
We also optimally place data reorganization code in
order to amortize the reorganization overhead through
the performance gain of SIMD vectorization. Experiments
on four array indirection kernels, which are extracted
from real-world scientific applications, show that our
proposed method effectively generates SIMD code for
irregular kernels with array indirection. Compared to
the existing SIMD vectorization methods, our proposed
method significantly improves the performance of
irregular kernels by 91\%, on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Leissa:2012:ECL,
author = "Roland Lei{\ss}a and Sebastian Hack and Ingo Wald",
title = "Extending a {C}-like language for portable {SIMD}
programming",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "65--74",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145825",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "SIMD instructions are common in CPUs for years now.
Using these instructions effectively requires not only
vectorization of code, but also modifications to the
data layout. However, automatic vectorization
techniques are often not powerful enough and suffer
from restricted scope of applicability; hence,
programmers often vectorize their programs manually by
using intrinsics: compiler-known functions that
directly expand to machine instructions. They
significantly decrease programmer productivity by
enforcing a very error-prone and hard-to-read
assembly-like programming style. Furthermore,
intrinsics are not portable because they are tied to a
specific instruction set. In this paper, we show how a
C-like language can be extended to allow for portable
and efficient SIMD programming. Our extension puts the
programmer in total control over where and how
control-flow vectorization is triggered. We present a
type system and a formal semantics of our extension and
prove the soundness of the type system. Using our
prototype implementation IVL that targets Intel's MIC
architecture and SSE instruction set, we show that the
generated code is roughly on par with handwritten
intrinsic code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kwon:2012:HAO,
author = "Okwan Kwon and Fahed Jubair and Rudolf Eigenmann and
Samuel Midkiff",
title = "A hybrid approach of {OpenMP} for clusters",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "75--84",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145827",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We present the first fully automated compiler-runtime
system that successfully translates and executes OpenMP
shared-address-space programs on laboratory-size
clusters, for the complete set of regular, repetitive
applications in the NAS Parallel Benchmarks. We
introduce a hybrid compiler-runtime translation scheme.
Compared to previous work, this scheme features a new
runtime data flow analysis and new compiler techniques
for improving data affinity and reducing communication
costs. We present and discuss the performance of our
translated programs, and compare them with the
performance of the MPI, HPF and UPC versions of the
benchmarks. The results show that our translated
programs achieve 75\% of the hand-coded MPI programs,
on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{hunEom:2012:DDP,
author = "Yong hun Eom and Stephen Yang and James C. Jenista and
Brian Demsky",
title = "{DOJ}: dynamically parallelizing object-oriented
programs",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "85--96",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145828",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We present Dynamic Out-of-Order Java (DOJ), a dynamic
parallelization approach. In DOJ, a developer annotates
code blocks as tasks to decouple these blocks from the
parent execution thread. The DOJ compiler then analyzes
the code to generate heap examiners that ensure the
parallel execution preserves the behavior of the
original sequential program. Heap examiners dynamically
extract heap dependences between code blocks and
determine when it is safe to execute a code block. We
have implemented DOJ and evaluated it on twelve
benchmarks. We achieved an average compilation speedup
of 31.15 times over OoOJava and an average execution
speedup of 12.73 times over sequential versions of the
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bonetta:2012:SLH,
author = "Daniele Bonetta and Achille Peternier and Cesare
Pautasso and Walter Binder",
title = "{S}: a scripting language for high-performance
{RESTful} {Web} services",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "97--106",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145829",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "There is an urgent need for novel programming
abstractions to leverage the parallelism in modern
multicore machines. We introduce S, a new
domain-specific language targeting the server-side
scripting of high-performance RESTful Web services. S
promotes an innovative programming model based on
explicit (control-flow) and implicit (process-level)
parallelism control, allowing the service developer to
specify which portions of the control-flow should be
executed in parallel. For each service, the choice of
the best level of parallelism is left to the runtime
system. We assess performance and scalability by
implementing two non-trivial composite Web services in
S. Experiments show that S-based Web services can
handle thousands of concurrent client requests on a
modern multicore machine.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mendez-Lojo:2012:GII,
author = "Mario Mendez-Lojo and Martin Burtscher and Keshav
Pingali",
title = "A {GPU} implementation of inclusion-based points-to
analysis",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "107--116",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145831",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Graphics Processing Units (GPUs) have emerged as
powerful accelerators for many regular algorithms that
operate on dense arrays and matrices. In contrast, we
know relatively little about using GPUs to accelerate
highly irregular algorithms that operate on
pointer-based data structures such as graphs. For the
most part, research has focused on GPU implementations
of graph analysis algorithms that do not modify the
structure of the graph, such as algorithms for
breadth-first search and strongly-connected components.
In this paper, we describe a high-performance GPU
implementation of an important graph algorithm used in
compilers such as gcc and LLVM: Andersen-style
inclusion-based points-to analysis. This algorithm is
challenging to parallelize effectively on GPUs because
it makes extensive modifications to the structure of
the underlying graph and performs relatively little
computation. In spite of this, our program, when
executed on a 14 Streaming Multiprocessor GPU, achieves
an average speedup of 7x compared to a sequential CPU
implementation and outperforms a parallel
implementation of the same algorithm running on 16 CPU
cores. Our implementation provides general insights
into how to produce high-performance GPU
implementations of graph algorithms, and it highlights
key differences between optimizing parallel programs
for multicore CPUs and for GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Merrill:2012:SGG,
author = "Duane Merrill and Michael Garland and Andrew
Grimshaw",
title = "Scalable {GPU} graph traversal",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "117--128",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145832",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Breadth-first search (BFS) is a core primitive for
graph traversal and a basis for many higher-level graph
analysis algorithms. It is also representative of a
class of parallel computations whose memory accesses
and work distribution are both irregular and
data-dependent. Recent work has demonstrated the
plausibility of GPU sparse graph traversal, but has
tended to focus on asymptotically inefficient
algorithms that perform poorly on graphs with
non-trivial diameter. We present a BFS parallelization
focused on fine-grained task management constructed
from efficient prefix sum that achieves an
asymptotically optimal O (| V |+| E |) work complexity.
Our implementation delivers excellent performance on
diverse graphs, achieving traversal rates in excess of
3.3 billion and 8.3 billion traversed edges per second
using single and quad-GPU configurations, respectively.
This level of performance is several times faster than
state-of-the-art implementations both CPU and GPU
platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zu:2012:GBN,
author = "Yuan Zu and Ming Yang and Zhonghu Xu and Lin Wang and
Xin Tian and Kunyang Peng and Qunfeng Dong",
title = "{GPU}-based {NFA} implementation for memory efficient
high speed regular expression matching",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "129--140",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145833",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Regular expression pattern matching is the foundation
and core engine of many network functions, such as
network intrusion detection, worm detection, traffic
analysis, web applications and so on. DFA-based
solutions suffer exponentially exploding state space
and cannot be remedied without sacrificing matching
speed. Given this scalability problem of DFA-based
methods, there has been increasing interest in
NFA-based methods for memory efficient regular
expression matching. To achieve high matching speed
using NFA, it requires potentially massive parallel
processing, and hence represents an ideal programming
task on Graphic Processor Unit (GPU). Based on in-depth
understanding of NFA properties as well as GPU
architecture, we propose effective methods for fitting
NFAs into GPU architecture through proper data
structure and parallel programming design, so that
GPU's parallel processing power can be better utilized
to achieve high speed regular expression matching.
Experiment results demonstrate that, compared with the
existing GPU-based NFA implementation method [9], our
proposed methods can boost matching speed by 29 to 46
times, consistently yielding above 10Gbps matching
speed on NVIDIA GTX-460 GPU. Meanwhile, our design only
needs a small amount of memory space, growing
exponentially more slowly than DFA size. These results
make our design an effective solution for memory
efficient high speed regular expression matching, and
clearly demonstrate the power and potential of GPU as a
platform for memory efficient high speed regular
expression matching.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kogan:2012:MCF,
author = "Alex Kogan and Erez Petrank",
title = "A methodology for creating fast wait-free data
structures",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "141--150",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145835",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Lock-freedom is a progress guarantee that ensures
overall program progress. Wait-freedom is a stronger
progress guarantee that ensures the progress of each
thread in the program. While many practical lock-free
algorithms exist, wait-free algorithms are typically
inefficient and hardly used in practice. In this paper,
we propose a methodology called fast-path-slow-path for
creating efficient wait-free algorithms. The idea is to
execute the efficient lock-free version most of the
time and revert to the wait-free version only when
things go wrong. The generality and effectiveness of
this methodology is demonstrated by two examples. In
this paper, we apply this idea to a recent construction
of a wait-free queue, bringing the wait-free
implementation to perform in practice as efficient as
the lock-free implementation. In another work, the
fast-path-slow-path methodology has been used for
(dramatically) improving the performance of a wait-free
linked-list.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Prokopec:2012:CTE,
author = "Aleksandar Prokopec and Nathan Grasso Bronson and Phil
Bagwell and Martin Odersky",
title = "Concurrent tries with efficient non-blocking
snapshots",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "151--160",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145836",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We describe a non-blocking concurrent hash trie based
on shared-memory single-word compare-and-swap
instructions. The hash trie supports standard mutable
lock-free operations such as insertion, removal, lookup
and their conditional variants. To ensure
space-efficiency, removal operations compress the trie
when necessary. We show how to implement an efficient
lock-free snapshot operation for concurrent hash tries.
The snapshot operation uses a single-word
compare-and-swap and avoids copying the data structure
eagerly. Snapshots are used to implement consistent
iterators and a linearizable size retrieval. We compare
concurrent hash trie performance with other concurrent
data structures and evaluate the performance of the
snapshot operation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Crain:2012:SFB,
author = "Tyler Crain and Vincent Gramoli and Michel Raynal",
title = "A speculation-friendly binary search tree",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "161--170",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145837",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We introduce the first binary search tree algorithm
designed for speculative executions. Prior to this
work, tree structures were mainly designed for their
pessimistic (non-speculative) accesses to have a
bounded complexity. Researchers tried to evaluate
transactional memory using such tree structures whose
prominent example is the red-black tree library
developed by Oracle Labs that is part of multiple
benchmark distributions. Although well-engineered, such
structures remain badly suited for speculative
accesses, whose step complexity might raise
dramatically with contention. We show that our
speculation-friendly tree outperforms the existing
transaction-based version of the AVL and the red-black
trees. Its key novelty stems from the decoupling of
update operations: they are split into one transaction
that modifies the abstraction state and multiple ones
that restructure its tree implementation in the
background. In particular, the speculation-friendly
tree is shown correct, reusable and it speeds up a
transaction-based travel reservation application by up
to 3.5x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2012:PUA,
author = "Yifeng Chen and Xiang Cui and Hong Mei",
title = "{PARRAY}: a unifying array representation for
heterogeneous parallelism",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "171--180",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145838",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "This paper introduces a programming interface called
PARRAY (or Parallelizing ARRAYs) that supports
system-level succinct programming for heterogeneous
parallel systems like GPU clusters. The current
practice of software development requires combining
several low-level libraries like Pthread, OpenMP, CUDA
and MPI. Achieving productivity and portability is hard
with different numbers and models of GPUs. PARRAY
extends mainstream C programming with novel array types
of distinct features: (1) the dimensions of an array
type are nested in a tree, conceptually reflecting the
memory hierarchy; (2) the definition of an array type
may contain references to other array types, allowing
sophisticated array types to be created for
parallelization; (3) threads also form arrays that
allow programming in a
Single-Program-Multiple-Codeblock (SPMC) style to unify
various sophisticated communication patterns. This
leads to shorter, more portable and maintainable
parallel codes, while the programmer still has control
over performance-related features necessary for deep
manual optimization. Although the source-to-source code
generator only faithfully generates low-level library
calls according to the type information, higher-level
programming and automatic performance optimization are
still possible through building libraries of
sub-programs on top of PARRAY. The case study on
cluster FFT illustrates a simple 30-line code that 2x
outperforms Intel Cluster MKL on the Tianhe-1A system
with 7168 Fermi GPUs and 14336 CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Blelloch:2012:IDP,
author = "Guy E. Blelloch and Jeremy T. Fineman and Phillip B.
Gibbons and Julian Shun",
title = "Internally deterministic parallel algorithms can be
fast",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "181--192",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145840",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The virtues of deterministic parallelism have been
argued for decades and many forms of deterministic
parallelism have been described and analyzed. Here we
are concerned with one of the strongest forms,
requiring that for any input there is a unique
dependence graph representing a trace of the
computation annotated with every operation and value.
This has been referred to as internal determinism, and
implies a sequential semantics--- i.e., considering any
sequential traversal of the dependence graph is
sufficient for analyzing the correctness of the code.
In addition to returning deterministic results,
internal determinism has many advantages including ease
of reasoning about the code, ease of verifying
correctness, ease of debugging, ease of defining
invariants, ease of defining good coverage for testing,
and ease of formally, informally and experimentally
reasoning about performance. On the other hand one
needs to consider the possible downsides of
determinism, which might include making algorithms (i)
more complicated, unnatural or special purpose and/or
(ii) slower or less scalable. In this paper we study
the effectiveness of this strong form of determinism
through a broad set of benchmark problems. Our main
contribution is to demonstrate that for this wide body
of problems, there exist efficient internally
deterministic algorithms, and moreover that these
algorithms are natural to reason about and not
complicated to code. We leverage an approach to
determinism suggested by Steele (1990), which is to use
nested parallelism with commutative operations. Our
algorithms apply several diverse programming paradigms
that fit within the model including (i) a strict
functional style (no shared state among concurrent
operations), (ii) an approach we refer to as
deterministic reservations, and (iii) the use of
commutative, linearizable operations on data
structures. We describe algorithms for the benchmark
problems that use these deterministic approaches and
present performance results on a 32-core machine.
Perhaps surprisingly, for all problems, our internally
deterministic algorithms achieve good speedup and good
performance even relative to prior nondeterministic
solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Leiserson:2012:DPR,
author = "Charles E. Leiserson and Tao B. Schardl and Jim
Sukha",
title = "Deterministic parallel random-number generation for
dynamic-multithreading platforms",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "193--204",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145841",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Existing concurrency platforms for dynamic
multithreading do not provide repeatable parallel
random-number generators. This paper proposes that a
mechanism called pedigrees be built into the runtime
system to enable efficient deterministic parallel
random-number generation. Experiments with the
open-source MIT Cilk runtime system show that the
overhead for maintaining pedigrees is negligible.
Specifically, on a suite of 10 benchmarks, the relative
overhead of Cilk with pedigrees to the original Cilk
has a geometric mean of less than 1\%. We persuaded
Intel to modify its commercial C/C++ compiler, which
provides the Cilk Plus concurrency platform, to include
pedigrees, and we built a library implementation of a
deterministic parallel random-number generator called
DotMix that compresses the pedigree and then
``RC6-mixes'' the result. The statistical quality of
DotMix is comparable to that of the popular Mersenne
twister, but somewhat slower than a nondeterministic
parallel version of this efficient and high-quality
serial random-number generator. The cost of calling
DotMix depends on the ``spawn depth'' of the
invocation. For a naive Fibonacci calculation with n=40
that calls DotMix in every node of the computation,
this ``price of determinism'' is a factor of 2.65 in
running time, but for more realistic applications with
less intense use of random numbers --- such as a
maximal-independent-set algorithm, a practical
samplesort program, and a Monte Carlo discrete-hedging
application from QuantLib --- the observed ``price''
was less than 5\%. Moreover, even if overheads were
several times greater, applications using DotMix should
be amply fast for debugging purposes, which is a major
reason for desiring repeatability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nobari:2012:SPM,
author = "Sadegh Nobari and Thanh-Tung Cao and Panagiotis Karras
and St{\'e}phane Bressan",
title = "Scalable parallel minimum spanning forest
computation",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "205--214",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145842",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The proliferation of data in graph form calls for the
development of scalable graph algorithms that exploit
parallel processing environments. One such problem is
the computation of a graph's minimum spanning forest
(MSF). Past research has proposed several parallel
algorithms for this problem, yet none of them scales to
large, high-density graphs. In this paper we propose a
novel, scalable, parallel MSF algorithm for undirected
weighted graphs. Our algorithm leverages Prim's
algorithm in a parallel fashion, concurrently expanding
several subsets of the computed MSF. Our effort focuses
on minimizing the communication among different
processors without constraining the local growth of a
processor's computed subtree. In effect, we achieve a
scalability that previous approaches lacked. We
implement our algorithm in CUDA, running on a GPU and
study its performance using real and synthetic, sparse
as well as dense, structured and unstructured graph
data. Our experimental study demonstrates that our
algorithm outperforms the previous state-of-the-art
GPU-based MSF algorithm, while being several orders of
magnitude faster than sequential CPU-based
algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2012:GCV,
author = "Guodong Li and Peng Li and Geof Sawaya and Ganesh
Gopalakrishnan and Indradeep Ghosh and Sreeranga P.
Rajan",
title = "{GKLEE}: concolic verification and test generation for
{GPUs}",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "215--224",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145844",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Programs written for GPUs often contain correctness
errors such as races, deadlocks, or may compute the
wrong result. Existing debugging tools often miss these
errors because of their limited input-space and
execution-space exploration. Existing tools based on
conservative static analysis or conservative modeling
of SIMD concurrency generate false alarms resulting in
wasted bug-hunting. They also often do not target
performance bugs (non-coalesced memory accesses, memory
bank conflicts, and divergent warps). We provide a new
framework called GKLEE that can analyze C++ GPU
programs, locating the aforesaid correctness and
performance bugs. For these programs, GKLEE can also
automatically generate tests that provide high
coverage. These tests serve as concrete witnesses for
every reported bug. They can also be used for
downstream debugging, for example to test the kernel on
the actual hardware. We describe the architecture of
GKLEE, its symbolic virtual machine model, and describe
previously unknown bugs and performance issues that it
detected on commercial SDK kernels. We describe GKLEE's
test-case reduction heuristics, and the resulting
scalability improvement for a given coverage target.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Du:2012:ABF,
author = "Peng Du and Aurelien Bouteiller and George Bosilca and
Thomas Herault and Jack Dongarra",
title = "Algorithm-based fault tolerance for dense matrix
factorizations",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "225--234",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145845",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Dense matrix factorizations, such as LU, Cholesky and
QR, are widely used for scientific applications that
require solving systems of linear equations,
eigenvalues and linear least squares problems. Such
computations are normally carried out on
supercomputers, whose ever-growing scale induces a fast
decline of the Mean Time To Failure (MTTF). This paper
proposes a new hybrid approach, based on
Algorithm-Based Fault Tolerance (ABFT), to help matrix
factorizations algorithms survive fail-stop failures.
We consider extreme conditions, such as the absence of
any reliable component and the possibility of loosing
both data and checksum from a single failure. We will
present a generic solution for protecting the right
factor, where the updates are applied, of all above
mentioned factorizations. For the left factor, where
the panel has been applied, we propose a scalable
checkpointing algorithm. This algorithm features high
degree of checkpointing parallelism and cooperatively
utilizes the checksum storage leftover from the right
factor protection. The fault-tolerant algorithms
derived from this hybrid solution is applicable to a
wide range of dense matrix factorizations, with minor
modifications. Theoretical analysis shows that the
fault tolerance overhead sharply decreases with the
scaling in the number of computing units and the
problem size. Experimental results of LU and QR
factorization on the Kraken (Cray XT5) supercomputer
validate the theoretical evaluation and confirm
negligible overhead, with- and without-errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Buhler:2012:EDA,
author = "Jeremy D. Buhler and Kunal Agrawal and Peng Li and
Roger D. Chamberlain",
title = "Efficient deadlock avoidance for streaming computation
with filtering",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "235--246",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145846",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Parallel streaming computations have been studied
extensively, and many languages, libraries, and systems
have been designed to support this model of
computation. In particular, we consider acyclic
streaming computations in which individual nodes can
choose to filter, or discard, some of their inputs in a
data-dependent manner. In these applications, if the
channels between nodes have finite buffers, the
computation can deadlock. One method of deadlock
avoidance is to augment the data streams between nodes
with occasional dummy messages; however, for general
DAG topologies, no polynomial time algorithm is known
to compute the intervals at which dummy messages must
be sent to avoid deadlock. In this paper, we show that
deadlock avoidance for streaming computations with
filtering can be performed efficiently for a large
class of DAG topologies. We first present a new method
where each dummy message is tagged with a destination,
so as to reduce the number of dummy messages sent over
the network. We then give efficient algorithms for
dummy interval computation in series-parallel DAGs. We
finally generalize our results to a larger graph
family, which we call the CS4 DAGs, in which every
undirected Cycle is Single-Source and Single-Sink (
CS$^4$ ). Our results show that, for a large set of
application topologies that are both intuitively useful
and formalizable, the streaming model with filtering
can be implemented safely with reasonable overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dice:2012:LCG,
author = "David Dice and Virendra J. Marathe and Nir Shavit",
title = "Lock cohorting: a general technique for designing
{NUMA} locks",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "247--256",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145848",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Multicore machines are quickly shifting to NUMA and
CC-NUMA architectures, making scalable NUMA-aware
locking algorithms, ones that take into account the
machines' non-uniform memory and caching hierarchy,
ever more important. This paper presents lock
cohorting, a general new technique for designing
NUMA-aware locks that is as simple as it is powerful.
Lock cohorting allows one to transform any spin-lock
algorithm, with minimal non-intrusive changes, into
scalable NUMA-aware spin-locks. Our new cohorting
technique allows us to easily create NUMA-aware
versions of the TATAS-Backoff, CLH, MCS, and ticket
locks, to name a few. Moreover, it allows us to derive
a CLH-based cohort abortable lock, the first NUMA-aware
queue lock to support abortability. We empirically
compared the performance of cohort locks with prior
NUMA-aware and classic NUMA-oblivious locks on a
synthetic micro-benchmark, a real world key-value store
application memcached, as well as the libc memory
allocator. Our results demonstrate that cohort locks
perform as well or better than known locks when the
load is low and significantly out-perform them as the
load increases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fatourou:2012:RCS,
author = "Panagiota Fatourou and Nikolaos D. Kallimanis",
title = "Revisiting the combining synchronization technique",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "257--266",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145849",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Fine-grain thread synchronization has been proved, in
several cases, to be outperformed by efficient
implementations of the combining technique where a
single thread, called the combiner, holding a
coarse-grain lock, serves, in addition to its own
synchronization request, active requests announced by
other threads while they are waiting by performing some
form of spinning. Efficient implementations of this
technique significantly reduce the cost of
synchronization, so in many cases they exhibit much
better performance than the most efficient finely
synchronized algorithms. In this paper, we revisit the
combining technique with the goal to discover where its
real performance power resides and whether or how
ensuring some desired properties (e.g., fairness in
serving requests) would impact performance. We do so by
presenting two new implementations of this technique;
the first (CC-Synch) addresses systems that support
coherent caches, whereas the second (DSM-Synch) works
better in cacheless NUMA machines. In comparison to
previous such implementations, the new implementations
(1) provide bounds on the number of remote memory
references (RMRs) that they perform, (2) support a
stronger notion of fairness, and (3) use simpler and
less basic primitives than previous approaches. In all
our experiments, the new implementations outperform by
far all previous state-of-the-art combining-based and
fine-grain synchronization algorithms. Our experimental
analysis sheds light to the questions we aimed to
answer. Several modern multi-core systems organize the
cores into clusters and provide fast communication
within the same cluster and much slower communication
across clusters. We present an hierarchical version of
CC-Synch, called H-Synch which exploits the
hierarchical communication nature of such systems to
achieve better performance. Experiments show that
H-Synch significantly outper forms previous
state-of-the-art hierarchical approaches. We provide
new implementations of common shared data structures
(like stacks and queues) based on CC-Synch, DSM-Synch
and H-Synch. Our experiments show that these
implementations outperform by far all previous
(fine-grain or combined-based) implementations of
shared stacks and queues.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tardieu:2012:WSS,
author = "Olivier Tardieu and Haichuan Wang and Haibo Lin",
title = "A work-stealing scheduler for {X10}'s task parallelism
with suspension",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "267--276",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145850",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The X10 programming language is intended to ease the
programming of scalable concurrent and distributed
applications. X10 augments a familiar imperative
object-oriented programming model with constructs to
support light-weight asynchronous tasks as well as
execution across multiple address spaces. A crucial
aspect of X10's runtime system is the scheduling of
concurrent tasks. Work-stealing schedulers have been
shown to efficiently load balance fine-grain
divide-and-conquer task-parallel program on SMPs and
multicores. But X10 is not limited to shared-memory
fork-join parallelism. X10 permits tasks to suspend and
synchronize by means of conditional atomic blocks and
remote task invocations. In this paper, we demonstrate
that work-stealing scheduling principles are applicable
to a rich programming language such as X10, achieving
performance at scale without compromising expressivity,
ease of use, or portability. We design and implement a
portable work-stealing execution engine for X10. While
this engine is biased toward the efficient execution of
fork-join parallelism in shared memory, it handles the
full X10 language, especially conditional atomic blocks
and distribution. We show that this engine improves the
run time of a series of benchmark programs by several
orders of magnitude when used in combination with the
C++ backend compiler and runtime for X10. It achieves
scaling comparable to state-of-the art work-stealing
scheduler implementations---the Cilk++ compiler and the
Java fork/join framework---despite the dramatic
increase in generality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Baskaran:2012:ACO,
author = "Muthu Manikandan Baskaran and Nicolas Vasilache and
Benoit Meister and Richard Lethin",
title = "Automatic communication optimizations through memory
reuse strategies",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "277--278",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145852",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Modern parallel architectures are emerging with
sophisticated hardware consisting of hierarchically
placed parallel processors and memories. The properties
of memories in a system vary wildly, not only
quantitatively (size, latency, bandwidth, number of
banks) but also qualitatively (scratchpad, cache).
Along with the emergence of such architectures comes
the need for effectively utilizing the parallel
processors and properly managing data movement across
memories to improve memory bandwidth and hide data
transfer latency. In this paper, we describe some of
the high-level optimizations that are targeted at the
improvement of memory performance in the R-Stream
compiler, a high-level source-to-source automatic
parallelizing compiler. We direct our focus in this
paper on optimizing communications (data transfers) by
improving memory reuse at various levels of an explicit
memory hierarchy. This general concept is well-suited
to the hardware properties of GPGPUs, which is the
architecture that we concentrate on for this paper. We
apply our techniques and obtain performance improvement
on various stencil kernels including an important
iterative stencil kernel in seismic processing
applications where the performance is comparable to
that of the state-of-the-art implementation of the
kernel by a CUDA expert.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2012:FPA,
author = "Gu Liu and Hong An and Wenting Han and Xiaoqiang Li
and Tao Sun and Wei Zhou and Xuechao Wei and Xulong
Tang",
title = "{FlexBFS}: a parallelism-aware implementation of
breadth-first search on {GPU}",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "279--280",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145853",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In this paper, we present FlexBFS, a parallelism-aware
implementation for breadth-first search on GPU. Our
implementation can adjust the computation resources
according to the feedback of available parallelism
dynamically. We also optimized our program in three
ways: (1)a simplified two-level queue management,(2)a
combined kernel strategy and (3)a high-degree vertices
specialization approach. Our experimental results show
that it can achieve 3 to 20 times speedup against the
fastest serial version, and can outperform the TBB
based multi-threading CPU version and the previous most
effective GPU version on all types of input graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Andersch:2012:PPE,
author = "Michael Andersch and Chi Ching Chi and Ben Juurlink",
title = "Programming parallel embedded and consumer
applications in {OpenMP} superscalar",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "281--282",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145854",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In this paper, we evaluate the performance and
usability of the parallel programming model OpenMP
Superscalar (OmpSs), apply it to 10 different
benchmarks and compare its performance with
corresponding POSIX threads implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhong:2012:OMS,
author = "Jianlong Zhong and Bingsheng He",
title = "An overview of {Medusa}: simplified graph processing
on {GPUs}",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "283--284",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145855",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Graphs are the de facto data structures for many
applications, and efficient graph processing is a must
for the application performance. GPUs have an order of
magnitude higher computational power and memory
bandwidth compared to CPUs and have been adopted to
accelerate several common graph algorithms. However, it
is difficult to write correct and efficient GPU
programs and even more difficult for graph processing
due to the irregularities of graph structures. To
address those difficulties, we propose a programming
framework named Medusa to simplify graph processing on
GPUs. Medusa offers a small set of APIs, based on which
developers can define their application logics by
writing sequential code without awareness of GPU
architectures. The Medusa runtime system automatically
executes the developer defined APIs in parallel on the
GPU, with a series of graph-centric optimizations. This
poster gives an overview of Medusa, and presents some
preliminary results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Alias:2012:ORA,
author = "Christophe Alias and Alain Darte and Alexandru
Plesco",
title = "Optimizing remote accesses for offloaded kernels:
application to high-level synthesis for {FPGA}",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "285--286",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145856",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In the context of the high-level synthesis (HLS) of
regular kernels offloaded to FPGA and communicating
with an external DDR memory, we show how to
automatically generate adequate communicating processes
for optimizing the transfer of remote data. This
requires a generalized form of communication coalescing
where data can be transferred from the external memory
even when this memory is not fully up-to-date.
Experiments with Altera HLS tools demonstrate that this
automatization, based on advanced polyhedral code
analysis and code generation techniques, can be used to
efficiently map C kernels to FPGA, by generating,
entirely at C level, all the necessary glue (the
communication processes), which is compiled with the
same HLS tool as for the computation kernel.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tao:2012:UGA,
author = "Jian Tao and Marek Blazewicz and Steven R. Brandt",
title = "Using {GPU}'s to accelerate stencil-based computation
kernels for the development of large scale scientific
applications on heterogeneous systems",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "287--288",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145857",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We present CaCUDA --- a GPGPU kernel abstraction and a
parallel programming framework for developing highly
efficient large scale scientific applications using
stencil computations on hybrid CPU/GPU architectures.
CaCUDA is built upon the Cactus computational toolkit,
an open source problem solving environment designed for
scientists and engineers. Due to the flexibility and
extensibility of the Cactus toolkit, the addition of a
GPGPU programming framework required no changes to the
Cactus infrastructure, guaranteeing that existing
features and modules will continue to work without
modification. CaCUDA was tested and benchmarked using a
3D CFD code based on a finite difference discretization
of Navier--Stokes equations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Marker:2012:MED,
author = "Bryan Marker and Andy Terrel and Jack Poulson and Don
Batory and Robert van de Geijn",
title = "Mechanizing the expert dense linear algebra
developer",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "289--290",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145858",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The efforts of an expert to parallelize and optimize a
dense linear algebra algorithm for distributed-memory
targets are largely mechanical and repetitive. We
demonstrate that these efforts can be encoded and
automatically applied to obviate the manual
implementation of many algorithms in high-performance
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nugteren:2012:BHM,
author = "Cedric Nugteren and Henk Corporaal",
title = "The boat hull model: adapting the roofline model to
enable performance prediction for parallel computing",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "291--292",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145859",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Multi-core and many-core were already major trends for
the past six years, and are expected to continue for
the next decades. With these trends of parallel
computing, it becomes increasingly difficult to decide
on which architecture to run a given application. In
this work, we use an algorithm classification to
predict performance prior to algorithm implementation.
For this purpose, we modify the roofline model to
include class information. In this way, we enable
architectural choice through performance prediction
prior to the development of architecture specific code.
The new model, the boat hull model, is demonstrated
using a GPU as a target architecture. We show for 6
example algorithms that performance is predicted
accurately without requiring code to be available.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Feng:2012:SPG,
author = "Min Feng and Rajiv Gupta and Laxmi N. Bhuyan",
title = "Speculative parallelization on {GPGPUs}",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "293--294",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145860",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "This paper overviews the first speculative
parallelization technique for GPUs that can exploit
parallelism in loops even in the presence of dynamic
irregularities that may give rise to cross-iteration
dependences. The execution of a speculatively
parallelized loop consists of five phases: scheduling,
computation, misspeculation check, result committing,
and misspeculation recovery. We perform misspeculation
check on the GPU to minimize its cost. We optimize the
procedures of result committing and misspeculation
recovery to reduce the result copying and recovery
overhead. Finally, the scheduling policies are designed
according to the types of cross-iteration dependences
to reduce the misspeculation rate. Our preliminary
evaluation was conducted on an nVidia Tesla C1060
hosted in an Intel(R) Xeon(R) E5540 machine. We use
three benchmarks of which two contain irregular memory
accesses and one contain irregular control flows that
can give rise to cross-iteration dependences. Our
implementation achieves 3.6x-13.8x speedups for loops
in these benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jimborean:2012:APM,
author = "Alexandra Jimborean and Philippe Clauss and
Beno{\^\i}t Pradelle and Luis Mastrangelo and Vincent
Loechner",
title = "Adapting the polyhedral model as a framework for
efficient speculative parallelization",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "295--296",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145861",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In this paper, we present a Thread-Level Speculation
(TLS) framework whose main feature is to be able to
speculatively parallelize a sequential loop nest in
various ways, by re-scheduling its iterations. The
transformation to be applied is selected at runtime
with the goal of minimizing the number of rollbacks and
maximizing performance. We perform code transformations
by applying the polyhedral model that we adapted for
speculative and runtime code parallelization. For this
purpose, we design a parallel code pattern which is
patched by our runtime system according to the
profiling information collected on some execution
samples. Adaptability is ensured by considering chunks
of code of various sizes, that are launched
successively, each of which being parallelized in a
different manner, or run sequentially, depending on the
currently observed behavior for accessing memory. We
show on several benchmarks that our framework yields
good performance on codes which could not be handled
efficiently by previously proposed TLS systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gong:2012:OCN,
author = "Yifan Gong and Bingsheng He and Jianlong Zhong",
title = "An overview of {CMPI}: network performance aware {MPI}
in the cloud",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "297--298",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145862",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Cloud computing enables users to perform distributed
computing tasks on many virtual machines, without
owning a physical cluster. Recently, various
distributed computing tasks such as scientific
applications are being moved from supercomputers and
private clusters to public clouds. Message passing
interface (MPI) is a key and common component in
distributed computing tasks. The virtualized computing
environment of the public cloud hides the network
topology information from the users, and existing
topology-aware optimizations for MPI are no longer
feasible in the cloud environment. We propose a network
performance aware MPI library named CMPI. CMPI embraces
a new model for capturing the network performance among
different virtual machines in the cloud. Based on the
network performance model, we develop novel network
performance aware algorithms for communication
operations. This poster gives an overview of CMPI
design, and presents some preliminary results on
collective operations such as broadcast.We demonstrate
the effectiveness of our network performance aware
optimizations on Amazon EC2.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kim:2012:OUP,
author = "Jungwon Kim and Sangmin Seo and Jun Lee and Jeongho
Nah and Gangwon Jo and Jaejin Lee",
title = "{OpenCL} as a unified programming model for
heterogeneous {CPU\slash GPU} clusters",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "299--300",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145863",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In this paper, we propose an OpenCL framework for
heterogeneous CPU/GPU clusters, and show that the
framework achieves both high performance and ease of
programming. The framework provides an illusion of a
single system for the user. It allows the application
to utilize multiple heterogeneous compute devices, such
as multicore CPUs and GPUs, in a remote node as if they
were in a local node. No communication API, such as the
MPI library, is required in the application source. We
implement the OpenCL framework and evaluate its
performance on a heterogeneous CPU/GPU cluster that
consists of one host node and nine compute nodes using
eleven OpenCL benchmark applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tzenakis:2012:BBL,
author = "George Tzenakis and Angelos Papatriantafyllou and John
Kesapides and Polyvios Pratikakis and Hans
Vandierendonck and Dimitrios S. Nikolopoulos",
title = "{BDDT}: block-level dynamic dependence analysis for
deterministic task-based parallelism",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "301--302",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145864",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kamil:2012:PPP,
author = "Shoaib Kamil and Derrick Coetzee and Scott Beamer and
Henry Cook and Ekaterina Gonina and Jonathan Harper and
Jeffrey Morlan and Armando Fox",
title = "Portable parallel performance from sequential,
productive, embedded domain-specific languages",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "303--304",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145865",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Domain-expert productivity programmers desire scalable
application performance, but usually must rely on
efficiency programmers who are experts in explicit
parallel programming to achieve it. Since such
programmers are rare, to maximize reuse of their work
we propose encapsulating their strategies in
mini-compilers for domain-specific embedded languages
(DSELs) glued together by a common high-level host
language familiar to productivity programmers. The
nontrivial applications that use these DSELs perform up
to 98\% of peak attainable performance, and comparable
to or better than existing hand-coded implementations.
Our approach is unique in that each mini-compiler not
only performs conventional compiler transformations and
optimizations, but includes imperative procedural code
that captures an efficiency expert's strategy for
mapping a narrow domain onto a specific type of
hardware. The result is source- and
performance-portability for productivity programmers
and parallel performance that rivals that of hand-coded
efficiency-language implementations of the same
applications. We describe a framework that supports our
methodology and five implemented DSELs supporting
common computation kernels. Our results demonstrate
that for several interesting classes of problems,
efficiency-level parallel performance can be achieved
by packaging efficiency programmers' expertise in a
reusable framework that is easy to use for both
productivity programmers and efficiency programmers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hoefler:2012:CCO,
author = "Torsten Hoefler and Timo Schneider",
title = "Communication-centric optimizations by dynamically
detecting collective operations",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "305--306",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145866",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The steady increase of parallelism in high-performance
computing platforms implies that communication will be
most important in large-scale applications. In this
work, we tackle the problem of transparent optimization
of large-scale communication patterns using online
compilation techniques. We utilize the Group Operation
Assembly Language (GOAL), an abstract parallel dataflow
definition language, to specify our transformations in
a device-independent manner. We develop fast schemes
that analyze dataflow and synchronization semantics in
GOAL and detect if parts of the (or the whole)
communication pattern express a known collective
communication operation. The detection of collective
operations allows us to replace the detected patterns
with highly optimized algorithms or low-level hardware
calls and thus improve performance significantly.
Benchmark results suggest that our technique can lead
to a performance improvement of orders of magnitude
compared with various optimized algorithms written in
Co-Array Fortran. Detecting collective operations also
improves the programmability of parallel languages in
that the user does not have to understand the detailed
semantics of high-level communication operations in
order to generate efficient and scalable code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhang:2012:LLF,
author = "Donghui Zhang and Per-{\AA}ke Larson",
title = "{LHlf}: lock-free linear hashing (poster paper)",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "307--308",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145868",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "LHlf is a new hash table designed to allow very high
levels of concurrency. The table is lock free and grows
and shrinks auto-matically according to the number of
items in the table. Insertions, lookups and deletions
are never blocked. LHlf is based on linear hashing but
adopts recursive split-ordering of the items within a
bucket to be able to split and merge lists in a lock
free manner. LHlf is as fast as the best previous
lock-free design and in addition it offers stable
performance, uses less space, and supports both
expansions and contractions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Timnat:2012:WFL,
author = "Shahar Timnat and Anastasia Braginsky and Alex Kogan
and Erez Petrank",
title = "Wait-free linked-lists",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "309--310",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145869",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The linked-list data structure is fundamental and
ubiquitous. Lock-free versions of the linked-list are
well known. However, the existence of a practical
wait-free linked-list has been open. In this work we
designed such a linked-list. To achieve better
performance, we have also extended this design using
the fast-path-slow-path methodology. The resulting
implementation achieves performance which is
competitive with that of Harris's lock-free list, while
still guaranteeing non-starvation via wait-freedom. We
have also developed a proof for the correctness and the
wait-freedom of our design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dinh:2012:SPD,
author = "Minh Ngoc Dinh and David Abramson and Chao Jin and
Andrew Gontarek and Bob Moench and Luiz DeRose",
title = "Scalable parallel debugging with statistical
assertions",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "311--312",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145870",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Traditional debuggers are of limited value for modern
scientific codes that manipulate large complex data
structures. This paper discusses a novel debug-time
assertion, called a ``Statistical Assertion'', that
allows a user to reason about large data structures,
and the primitives are parallelised to provide an
efficient solution. We present the design and
implementation of statistical assertions, and
illustrate the debugging technique with a molecular
dynamics simulation. We evaluate the performance of the
tool on a 12,000 cores Cray XE6.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Malkis:2012:VSB,
author = "Alexander Malkis and Anindya Banerjee",
title = "Verification of software barriers",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "313--314",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145871",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "This paper describes frontiers in verification of the
software barrier synchronization primitive. So far most
software barrier algorithms have not been mechanically
verified. We show preliminary results in automatically
proving the correctness of the major software
barriers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mittal:2012:CAS,
author = "Anshul Mittal and Nikhil Jain and Thomas George and
Yogish Sabharwal and Sameer Kumar",
title = "Collective algorithms for sub-communicators",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "315--316",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145872",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Collective communication over a group of processors is
an integral and time consuming component in many HPC
applications. Many modern day supercomputers are based
on torus interconnects. On such systems, for an
irregular communicator comprising of a subset of
processors, the algorithms developed so far are not
contention free in general and hence non-optimal. In
this paper, we present a novel contention-free
algorithm to perform collective operations over a
subset of processors in a torus network. We also extend
previous work on regular communicators to handle
special cases of irregular communicators that occur
frequently in parallel scientific applications. For the
generic case where multiple node disjoint
sub-communicators communicate simultaneously in a
loosely synchronous fashion, we propose a novel
cooperative approach to route the data for individual
sub-communicators without contention. Empirical results
demonstrate that our algorithms outperform the
optimized MPI collective implementation on IBM's Blue
Gene/P supercomputer for large data sizes and random
node distributions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{DeKoster:2012:SVE,
author = "Joeri {De Koster} and Stefan Marr and Theo D'Hondt",
title = "Synchronization views for event-loop actors",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "317--318",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145873",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The actor model has already proven itself as an
interesting concurrency model that avoids issues such
as deadlocks and race conditions by construction, and
thus facilitates concurrent programming. The tradeoff
is that it sacrifices expressiveness and efficiency
especially with respect to data parallelism. However,
many standard solutions to computationally expensive
problems employ data parallel algorithms for better
performance on parallel systems. We identified three
problems that inhibit the use of data-parallel
algorithms within the actor model. Firstly, one of the
main properties of the actor model, the fact that no
data is shared, is one of the most severe performance
bottlenecks. Especially the fact that shared state can
not be read truly in parallel. Secondly, the actor
model on its own does not provide a mechanism to
specify extra synchronization conditions on batches of
messages which leads to event-level data-races. And
lastly, programmers are forced to write code in a
continuation-passing style (CPS) to handle typical
request-response situations. However, CPS breaks the
sequential flow of the code and is often hard to
understand, which increases complexity and lowers
maintainability. We proposes synchronization views to
solve these three issues without compromising the
semantic properties of the actor model. Thus, the
resulting concurrency model maintains deadlock-freedom,
avoids low-level race conditions, and keeps the
semantics of macro-step execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Metreveli:2012:CCP,
author = "Zviad Metreveli and Nickolai Zeldovich and M. Frans
Kaashoek",
title = "{CPHASH}: a cache-partitioned hash table",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "319--320",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145874",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "CPHash is a concurrent hash table for multicore
processors. CPHash partitions its table across the
caches of cores and uses message passing to transfer
lookups\slash inserts to a partition. CPHash's message
passing avoids the need for locks, pipelines batches of
asynchronous messages, and packs multiple messages into
a single cache line transfer. Experiments on a 80-core
machine with 2 hardware threads per core show that
CPHash has $ \approx 1.6 \times $ higher throughput
than a hash table implemented using fine-grained locks.
An analysis shows that CPHash wins because it
experiences fewer cache misses and its cache misses are
less expensive, because of less contention for the
on-chip interconnect and DRAM. CPServer, a key\slash
value cache server using CPHash, achieves $ \approx 5
\% $ higher throughput than a key\slash value cache
server that uses a hash table with fine-grained locks,
but both achieve better throughput and scalability than
memcached. The throughput of CPHash and CPServer also
scale near-linearly with the number of cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wernsing:2012:RHA,
author = "John R. Wernsing and Greg Stitt",
title = "{RACECAR}: a heuristic for automatic function
specialization on multi-core heterogeneous systems",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "321--322",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145875",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "High-performance computing systems increasingly
combine multi-core processors and heterogeneous
resources such as graphics-processing units and
field-programmable gate arrays. However, significant
application design complexity for such systems has
often led to untapped performance potential.
Application designers targeting such systems currently
must determine how to parallelize computation, create
device-specialized implementations for each
heterogeneous resource, and determine how to partition
work for each resource. In this paper, we present the
RACECAR heuristic to automate the optimization of
applications for multi-core heterogeneous systems by
automatically exploring implementation alternatives
that include different algorithms, parallelization
strategies, and work distributions. Experimental
results show RACECAR-specialized implementations
achieve speedups up to 117x and average 11x compared to
a single CPU thread when parallelizing computation
across multiple cores, graphics-processing units, and
field-programmable gate arrays.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2012:LFA,
author = "Yujie Liu and Michael Spear",
title = "A lock-free, array-based priority queue",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "323--324",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145876",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Noll:2012:IDO,
author = "Albert Noll and Thomas R. Gross",
title = "An infrastructure for dynamic optimization of parallel
programs",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "325--326",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145877",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Object-oriented programming languages like Java
provide only low-level constructs (e.g., starting a
thread) to describe concurrency. High-level
abstractions (e.g., thread pools) are merely provided
as a library. As a result, a compiler is not aware of
the high-level semantics of a parallel library and
therefore misses important optimization opportunities.
This paper presents a simple source language extension
based on which a compiler is provided with the
opportunity to perform new optimizations that are
particularly effective for parallel code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kjolstad:2012:ADG,
author = "Fredrik Kjolstad and Torsten Hoefler and Marc Snir",
title = "Automatic datatype generation and optimization",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "327--328",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145878",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Many high performance applications spend considerable
time packing noncontiguous data into contiguous
communication buffers. MPI Datatypes provide an
alternative by describing noncontiguous data layouts.
This allows sophisticated hardware to retrieve data
directly from application data structures. However,
packing codes in real-world applications are often
complex and specifying equivalent datatypes is
difficult, time-consuming, and error prone. We present
an algorithm that automates the transformation. We have
implemented the algorithm in a tool that transforms
packing code to MPI Datatypes, and evaluated it by
transforming 90 packing codes from the NAS Parallel
Benchmarks. The transformation allows easy porting of
applications to new machines that benefit from
datatypes, thus improving programmer productivity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Burnim:2012:NIN,
author = "Jacob Burnim and Tayfun Elmas and George Necula and
Koushik Sen",
title = "{NDetermin}: inferring nondeterministic sequential
specifications for parallelism correctness",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "329--330",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145879",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Nondeterministic Sequential (NDSeq) specifications
have been proposed as a means for separating the
testing, debugging, and verifying of a program's
parallelism correctness and its sequential functional
correctness. In this work, we present a technique that,
given a few representative executions of a parallel
program, combines dynamic data flow analysis and
Minimum-Cost Boolean Satisfiability (MinCostSAT)
solving for automatically inferring a likely NDSeq
specification for the parallel program. For a number of
Java benchmarks, our tool NDetermin infers equivalent
or stronger NDSeq specifications than those previously
written manually.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Park:2012:CB,
author = "Chang-Seo Park and Koushik Sen",
title = "Concurrent breakpoints",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "331--332",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145880",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In program debugging, reproducibility of bugs is a key
requirement. Unfortunately, bugs in concurrent programs
are notoriously difficult to reproduce because bugs due
to concurrency happen under very specific thread
schedules and the likelihood of taking such corner-case
schedules during regular testing is very low. We
propose concurrent breakpoints, a light-weight and
programmatic way to make a concurrency bug
reproducible. We describe a mechanism that helps to hit
a concurrent breakpoint in a concurrent execution with
high probability. We have implemented concurrent
breakpoints as a light-weight library for Java and
C/C++ programs. We have used the implementation to
deterministically reproduce several known
non-deterministic bugs in real-world concurrent Java
and C/C++ programs with almost 100\% probability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Stone:2012:EMP,
author = "Andrew Stone and John Dennis and Michelle Strout",
title = "Establishing a {Miniapp} as a programmability proxy",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "333--334",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145881",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Miniapps serve as test beds for prototyping and
evaluating new algorithms, data structures, and
programming models before incorporating such changes
into larger applications. For the miniapp to accurately
predict how a prototyped change would affect a larger
application it is necessary that the miniapp be shown
to serve as a proxy for that larger application.
Although many benchmarks claim to proxy the performance
for a set of large applications, little work has
explored what criteria must be met for a benchmark to
serve as a proxy for examining programmability. In this
poster we describe criteria that can be used to
establish that a miniapp serves as a performance and
programmability proxy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jiang:2012:OSP,
author = "Lei Jiang and Pragneshkumar B. Patel and George
Ostrouchov and Ferdinand Jamitzky",
title = "{OpenMP}-style parallelism in data-centered multicore
computing with {R}",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "335--336",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145882",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "R$^1$ is a domain specific language widely used for
data analysis by the statistics community as well as by
researchers in finance, biology, social sciences, and
many other disciplines. As R programs are linked to
input data, the exponential growth of available data
makes high-performance computing with R imperative. To
ease the process of writing parallel programs in R,
code transformation from a sequential program to a
parallel version would bring much convenience to R
users. In this paper, we present our work in
semi-automatic parallelization of R codes with
user-added OpenMP-style pragmas. While such pragmas are
used at the frontend, we take advantage of multiple
parallel backends with different R packages. We provide
flexibility for importing parallelism with plug-in
components, impose built-in MapReduce for data
processing, and also maintain code reusability. We
illustrate the advantage of the on-the-fly mechanisms
which can lead to significant applications in
data-centered parallel computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Caniou:2012:PAP,
author = "Yves Caniou and Daniel Diaz and Florian Richoux and
Philippe Codognet and Salvador Abreu",
title = "Performance analysis of parallel constraint-based
local search",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "337--338",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145883",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We present a parallel implementation of a
constraint-based local search algorithm and investigate
its performance results for hard combinatorial
optimization problems on two different platforms up to
several hundreds of cores. On a variety of classical
CSPs benchmarks, speedups are very good for a few tens
of cores, and good up to a hundred cores. More
challenging problems derived from real-life
applications (Costas array) shows even better speedups,
nearly optimal up to 256 cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Thiemann:2012:ACE,
author = "Peter Thiemann",
title = "{AGDA}-curious?: an exploration of programming with
dependent types",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "1--2",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "I explore programming with the dependently typed
functional language, AGDA. I present the progress which
AGDA has made, demonstrate its usage in a small
development, reflect critically on the state of the
art, and speculate about the way ahead. I do not seek
to persuade you to adopt AGDA as your primary tool for
systems development, but argue that AGDA stimulates new
useful ways to think about programming problems and
deserves not just curiosity but interest, support and
contribution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Stewart:2012:VHT,
author = "Gordon Stewart and Lennart Beringer and Andrew W.
Appel",
title = "Verified heap theorem prover by paramodulation",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "3--14",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present VeriStar, a verified theorem prover for a
decidable subset of separation logic. Together with
VeriSmall [3], a proved-sound Smallfoot-style program
analysis for C minor, VeriStar demonstrates that fully
machine-checked static analyses equipped with efficient
theorem provers are now within the reach of formal
methods. As a pair, VeriStar and VeriSmall represent
the first application of the Verified Software
Toolchain [4], a tightly integrated collection of
machine-verified program logics and compilers giving
foundational correctness guarantees. VeriStar is (1)
purely functional, (2) machine-checked, (3) end-to-end,
(4) efficient and (5) modular. By purely functional, we
mean it is implemented in Gallina, the pure functional
programming language embedded in the Coq theorem
prover. By machine-checked, we mean it has a proof in
Coq that when the prover says ``valid'', the checked
entailment holds in a proved-sound separation logic for
C minor. By end-to-end, we mean that when the static
analysis+theorem prover says a C minor program is safe,
the program will be compiled to a semantically
equivalent assembly program that runs on real hardware.
By efficient, we mean that the prover implements a
state-of-the-art algorithm for deciding heap
entailments and uses highly tuned verified functional
data structures. By modular, we mean that VeriStar can
be retrofitted to other static analyses as a
plug-compatible entailment checker and its soundness
proof can easily be ported to other separation
logics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Huffman:2012:FVM,
author = "Brian Huffman",
title = "Formal verification of monad transformers",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "15--16",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364532",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present techniques for reasoning about constructor
classes that (like the monad class) fix polymorphic
operations and assert polymorphic axioms. We do not
require a logic with first-class type constructors,
first-class polymorphism, or type quantification;
instead, we rely on a domain-theoretic model of the
type system in a universal domain to provide these
features. These ideas are implemented in the Tycon
library for the Isabelle theorem prover, which builds
on the HOLCF library of domain theory. The Tycon
library provides various axiomatic type constructor
classes, including functors and monads. It also
provides automation for instantiating those classes,
and for defining further subclasses. We use the Tycon
library to formalize three Haskell monad transformers:
the error transformer, the writer transformer, and the
resumption transformer. The error and writer
transformers do not universally preserve the monad
laws; however, we establish datatype invariants for
each, showing that they are valid monads when viewed as
abstract datatypes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Dunfield:2012:EIU,
author = "Joshua Dunfield",
title = "Elaborating intersection and union types",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "17--28",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Designing and implementing typed programming languages
is hard. Every new type system feature requires
extending the metatheory and implementation, which are
often complicated and fragile. To ease this process, we
would like to provide general mechanisms that subsume
many different features. In modern type systems,
parametric polymorphism is fundamental, but
intersection polymorphism has gained little traction in
programming languages. Most practical intersection type
systems have supported only refinement intersections,
which increase the expressiveness of types (more
precise properties can be checked) without altering the
expressiveness of terms; refinement intersections can
simply be erased during compilation. In contrast,
unrestricted intersections increase the expressiveness
of terms, and can be used to encode diverse language
features, promising an economy of both theory and
implementation. We describe a foundation for compiling
unrestricted intersection and union types: an
elaboration type system that generates ordinary $
\lambda $-calculus terms. The key feature is a
Forsythe-like merge construct. With this construct, not
all reductions of the source program preserve types;
however, we prove that ordinary call-by-value
evaluation of the elaborated program corresponds to a
type-preserving evaluation of the source program. We
also describe a prototype implementation and
applications of unrestricted intersections and unions:
records, operator overloading, and simulating dynamic
typing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Chen:2012:ETT,
author = "Sheng Chen and Martin Erwig and Eric Walkingshaw",
title = "An error-tolerant type system for variational lambda
calculus",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "29--40",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364535",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Conditional compilation and software product line
technologies make it possible to generate a huge number
of different programs from a single software project.
Typing each of these programs individually is usually
impossible due to the sheer number of possible
variants. Our previous work has addressed this problem
with a type system for variational lambda calculus
(VLC), an extension of lambda calculus with basic
constructs for introducing and organizing variation.
Although our type inference algorithm is more efficient
than the brute-force strategy of inferring the types of
each variant individually, it is less robust since type
inference will fail for the entire variational
expression if any one variant contains a type error. In
this work, we extend our type system to operate on VLC
expressions containing type errors. This extension
directly supports locating ill-typed variants and the
incremental development of variational programs. It
also has many subtle implications for the unification
of variational types. We show that our extended type
system possesses a principal typing property and that
the underlying unification problem is unitary. Our
unification algorithm computes partial unifiers that
lead to result types that (1) contain errors in as few
variants as possible and (2) are most general. Finally,
we perform an empirical evaluation to determine the
overhead of this extension compared to our previous
work, to demonstrate the improvements over the
brute-force approach, and to explore the effects of
various error distributions on the inference process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Krishnaswami:2012:SST,
author = "Neelakantan R. Krishnaswami and Aaron Turon and Derek
Dreyer and Deepak Garg",
title = "Superficially substructural types",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "41--54",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many substructural type systems have been proposed for
controlling access to shared state in higher-order
languages. Central to these systems is the notion of a
*resource*, which may be split into disjoint pieces
that different parts of a program can manipulate
independently without worrying about interfering with
one another. Some systems support a *logical* notion of
resource (such as permissions), under which two
resources may be considered disjoint even if they
govern the *same* piece of state. However, in nearly
all existing systems, the notions of resource and
disjointness are fixed at the outset, baked into the
model of the language, and fairly coarse-grained in the
kinds of sharing they enable. In this paper, inspired
by recent work on ``fictional disjointness'' in
separation logic, we propose a simple and flexible way
of enabling any module in a program to create its own
custom type of splittable resource (represented as a
commutative monoid), thus providing fine-grained
control over how the module's private state is shared
with its clients. This functionality can be
incorporated into an otherwise standard substructural
type system by means of a new typing rule we call *the
sharing rule*, whose soundness we prove semantically
via a novel resource-oriented Kripke logical
relation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Mitchell:2012:SBB,
author = "Neil Mitchell",
title = "Shake before building: replacing {\tt make} with
{Haskell}",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "55--66",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364538",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most complex software projects are compiled using a
build tool (e.g. make), which runs commands in an order
satisfying user-defined dependencies. Unfortunately,
most build tools require all dependencies to be
specified before the build starts. This restriction
makes many dependency patterns difficult to express,
especially those involving files generated at build
time. We show how to eliminate this restriction,
allowing additional dependencies to be specified while
building. We have implemented our ideas in the Haskell
library Shake, and have used Shake to write a complex
build system which compiles millions of lines of
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Chitil:2012:PTL,
author = "Olaf Chitil",
title = "Practical typed lazy contracts",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "67--76",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Until now there has been no support for specifying and
enforcing contracts within a lazy functional program.
That is a shame, because contracts consist of pre- and
post-conditions for functions that go beyond the
standard static types. This paper presents the design
and implementation of a small, easy-to-use, purely
functional contract library for Haskell, which, when a
contract is violated, also provides more useful
information than the classical blaming of one contract
partner. From now on lazy functional languages can
profit from the assurances in the development of
correct programs that contracts provide.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Oliveira:2012:FPS,
author = "Bruno C.d.S. Oliveira and William R. Cook",
title = "Functional programming with structured graphs",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "77--88",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364541",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a new functional programming model
for graph structures called structured graphs.
Structured graphs extend conventional algebraic
datatypes with explicit definition and manipulation of
cycles and/or sharing, and offer a practical and
convenient way to program graphs in functional
programming languages like Haskell. The representation
of sharing and cycles (edges) employs recursive binders
and uses an encoding inspired by parametric
higher-order abstract syntax. Unlike traditional
approaches based on mutable references or node/edge
lists, well-formedness of the graph structure is
ensured statically and reasoning can be done with
standard functional programming techniques. Since the
binding structure is generic, we can define many useful
generic combinators for manipulating structured graphs.
We give applications and show how to reason about
structured graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Sheard:2012:PPC,
author = "Timothy E. Sheard",
title = "Painless programming combining reduction and search:
design principles for embedding decision procedures in
high-level languages",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "89--102",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364542",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe the Funlogic system which extends a
functional language with existentially quantified
declarations. An existential declaration introduces a
variable and a set of constraints that its value should
meet. Existential variables are bound to conforming
values by a decision procedure. Funlogic embeds
multiple external decision procedures using a common
framework. Design principles for embedding decision
procedures are developed and illustrated for three
different decision procedures from widely varying
domains.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Dagand:2012:TFA,
author = "Pierre-Evariste Dagand and Conor McBride",
title = "Transporting functions across ornaments",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "103--114",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364544",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming with dependent types is a blessing and a
curse. It is a blessing to be able to bake invariants
into the definition of datatypes: we can finally write
correct-by-construction software. However, this extreme
accuracy is also a curse: a datatype is the combination
of a structuring medium together with a special purpose
logic. These domain-specific logics hamper any effort
of code reuse among similarly structured data. In this
paper, we exorcise our datatypes by adapting the notion
of ornament to our universe of inductive families. We
then show how code reuse can be achieved by ornamenting
functions. Using these functional ornaments, we capture
the relationship between functions such as the addition
of natural numbers and the concatenation of lists. With
this knowledge, we demonstrate how the implementation
of the former informs the implementation of the latter:
the user can ask the definition of addition to be
lifted to lists and she will only be asked the details
necessary to carry on adding lists rather than numbers.
Our presentation is formalised in a type theory with a
universe of datatypes and all our constructions have
been implemented as generic programs, requiring no
extension to the type theory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Myreen:2012:PPS,
author = "Magnus O. Myreen and Scott Owens",
title = "Proof-producing synthesis of {ML} from higher-order
logic",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "115--126",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364545",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The higher-order logic found in proof assistants such
as Coq and various HOL systems provides a convenient
setting for the development and verification of pure
functional programs. However, to efficiently run these
programs, they must be converted (or ``extracted'') to
functional programs in a programming language such as
ML or Haskell. With current techniques, this step,
which must be trusted, relates similar looking objects
that have very different semantic definitions, such as
the set-theoretic model of a logic and the operational
semantics of a programming language. In this paper, we
show how to increase the trustworthiness of this step
with an automated technique. Given a functional program
expressed in higher-order logic, our technique provides
the corresponding program for a functional language
defined with an operational semantics, and it provides
a mechanically checked theorem relating the two. This
theorem can then be used to transfer verified
properties of the logical function to the program. We
have implemented our technique in the HOL4 theorem
prover, translating functions to a core subset of
Standard ML, and have applied it to examples including
functional data structures, a parser generator,
cryptographic algorithms, and a garbage collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Danielsson:2012:OSU,
author = "Nils Anders Danielsson",
title = "Operational semantics using the partiality monad",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "127--138",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364546",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The operational semantics of a partial, functional
language is often given as a relation rather than as a
function. The latter approach is arguably more natural:
if the language is functional, why not take advantage
of this when defining the semantics? One can
immediately see that a functional semantics is
deterministic and, in a constructive setting,
computable. This paper shows how one can use the
coinductive partiality monad to define big-step or
small-step operational semantics for lambda-calculi and
virtual machines as total, computable functions (total
definitional interpreters). To demonstrate that the
resulting semantics are useful type soundness and
compiler correctness results are also proved. The
results have been implemented and checked using Agda, a
dependently typed programming language and proof
assistant.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Olukotun:2012:HPE,
author = "Kunle Olukotun",
title = "High performance embedded domain specific languages",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "139--140",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364548",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today, all high-performance computer architectures are
parallel and heterogeneous; a combination of multiple
CPUs, GPUs and specialized processors. This creates a
complex programming problem for application developers.
Domain-specific languages (DSLs) are a promising
solution to this problem because they provide an avenue
for application-specific abstractions to be mapped
directly to low level architecture-specific programming
models providing high programmer productivity and high
execution performance. In this talk I will describe our
approach to building high performance DSLs, which is
based on embedding in Scala, light-weight modular
staging and a DSL infrastructure called Delite. I will
describe how we transform impure functional programs
into efficient first-order low-level code using domain
specific optimization, parallelism optimization,
locality optimization, scalar optimization, and
architecture-specific code generation. All
optimizations and transformations are implemented in an
extensible DSL compiler architecture that minimizes the
programmer effort required to develop a new DSL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Severi:2012:PTS,
author = "Paula G. Severi and Fer-Jan J. de Vries",
title = "Pure type systems with corecursion on streams: from
finite to infinitary normalisation",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "141--152",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364550",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we use types for ensuring that programs
involving streams are well-behaved. We extend pure type
systems with a type constructor for streams, a modal
operator next and a fixed point operator for expressing
corecursion. This extension is called Pure Type Systems
with Corecursion (CoPTS). The typed lambda calculus for
reactive programs defined by Krishnaswami and Benton
can be obtained as a CoPTS. CoPTSs allow us to study a
wide range of typed lambda calculi extended with
corecursion using only one framework. In particular, we
study this extension for the calculus of constructions
which is the underlying formal language of Coq. We use
the machinery of infinitary rewriting and formalise the
idea of well-behaved programs using the concept of
infinitary normalisation. The set of finite and
infinite terms is defined as a metric completion. We
establish a precise connection between the modal
operator (o A ) and the metric at a syntactic level by
relating a variable of type (o A ) with the depth of
all its occurrences in a term. This syntactic
connection between the modal operator and the depth is
the key to the proofs of infinitary weak and strong
normalisation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Endrullis:2012:CES,
author = "J{\"o}rg Endrullis and Dimitri Hendriks and Rena
Bakhshi",
title = "On the complexity of equivalence of specifications of
infinite objects",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "153--164",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364551",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study the complexity of deciding the equality of
infinite objects specified by systems of equations, and
of infinite objects specified by $ \lambda $-terms. For
equational specifications there are several natural
notions of equality: equality in all models, equality
of the sets of solutions, and equality of normal forms
for productive specifications. For -$ \lambda $ terms
we investigate B{\"o}hm-tree equality and various
notions of observational equality. We pinpoint the
complexity of each of these notions in the arithmetical
or analytical hierarchy. We show that the complexity of
deciding equality in all models subsumes the entire
analytical hierarchy. This holds already for the most
simple infinite objects, viz. streams over $ \{ 0, 1 \}
$, and stands in sharp contrast to the low arithmetical
{$ \Pi^0_2 $}-completeness of equality of equationally
specified streams derived in [17] employing a different
notion of equality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Simoes:2012:AAA,
author = "Hugo Sim{\~o}es and Pedro Vasconcelos and M{\'a}rio
Florido and Steffen Jost and Kevin Hammond",
title = "Automatic amortised analysis of dynamic memory
allocation for lazy functional programs",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "165--176",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364575",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes the first successful attempt, of
which we are aware, to define an automatic, type-based
static analysis of resource bounds for lazy functional
programs. Our analysis uses the automatic amortisation
approach developed by Hofmann and Jost, which was
previously restricted to eager evaluation. In this
paper, we extend this work to a lazy setting by
capturing the costs of unevaluated expressions in type
annotations and by amortising the payment of these
costs using a notion of lazy potential. We present our
analysis as a proof system for predicting heap
allocations of a minimal functional language (including
higher-order functions and recursive data types) and
define a formal cost model based on Launchbury's
natural semantics for lazy evaluation. We prove the
soundness of our analysis with respect to the cost
model. Our approach is illustrated by a number of
representative and non-trivial examples that have been
analysed using a prototype implementation of our
analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Earl:2012:IPA,
author = "Christopher Earl and Ilya Sergey and Matthew Might and
David {Van Horn}",
title = "Introspective pushdown analysis of higher-order
programs",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "177--188",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364576",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the static analysis of functional programs,
pushdown flow analysis and abstract garbage collection
skirt just inside the boundaries of soundness and
decidability. Alone, each method reduces analysis times
and boosts precision by orders of magnitude. This work
illuminates and conquers the theoretical challenges
that stand in the way of combining the power of these
techniques. The challenge in marrying these techniques
is not subtle: computing the reachable control states
of a pushdown system relies on limiting access during
transition to the top of the stack; abstract garbage
collection, on the other hand, needs full access to the
entire stack to compute a root set, just as concrete
collection does. Introspective pushdown systems resolve
this conflict. Introspective pushdown systems provide
enough access to the stack to allow abstract garbage
collection, but they remain restricted enough to
compute control-state reachability, thereby enabling
the sound and precise product of pushdown analysis and
abstract garbage collection. Experiments reveal
synergistic interplay between the techniques, and the
fusion demonstrates ``better-than-both-worlds''
precision.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Launchbury:2012:ELT,
author = "John Launchbury and Iavor S. Diatchki and Thomas
DuBuisson and Andy Adams-Moran",
title = "Efficient lookup-table protocol in secure multiparty
computation",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "189--200",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364556",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Secure multiparty computation (SMC) permits a
collection of parties to compute a collaborative
result, without any of the parties gaining any
knowledge about the inputs provided by other parties.
Specifications for SMC are commonly presented as
boolean circuits, where optimizations come mostly from
reducing the number of multiply-operations (including
and -gates) --- these are the operations which incur
significant cost, either in computation overhead or in
communication between the parties. Instead, we take a
language-oriented approach, and consequently are able
to explore many other kinds of optimizations. We
present an efficient and general purpose SMC
table-lookup algorithm that can serve as a direct
alternative to circuits. Looking up a private (i.e.
shared, or encrypted) n -bit argument in a public table
requires log(n) parallel-and operations. We use the
advanced encryption standard algorithm (AES) as a
driving motivation, and by introducing different kinds
of parallelization techniques, produce the fastest
current SMC implementation of AES, improving the best
previously reported results by well over an order of
magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Stefan:2012:ACT,
author = "Deian Stefan and Alejandro Russo and Pablo Buiras and
Amit Levy and John C. Mitchell and David Mazi{\'e}res",
title = "Addressing covert termination and timing channels in
concurrent information flow systems",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "201--214",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364557",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When termination of a program is observable by an
adversary, confidential information may be leaked by
terminating accordingly. While this termination covert
channel has limited bandwidth for sequential programs,
it is a more dangerous source of information leakage in
concurrent settings. We address concurrent termination
and timing channels by presenting a dynamic
information-flow control system that mitigates and
eliminates these channels while allowing termination
and timing to depend on secret values. Intuitively, we
leverage concurrency by placing such potentially
sensitive actions in separate threads. While
termination and timing of these threads may expose
secret values, our system requires any thread observing
these properties to raise its information-flow label
accordingly, preventing leaks to lower-labeled
contexts. We implement this approach in a Haskell
library and demonstrate its applicability by building a
web server that uses information-flow control to
restrict untrusted web applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{zuSiederdissen:2012:SAC,
author = "Christian H{\"o}ner zu Siederdissen",
title = "Sneaking around {concatMap}: efficient combinators for
dynamic programming",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "215--226",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364559",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a framework of dynamic programming
combinators that provides a high-level environment to
describe the recursions typical of dynamic programming
over sequence data in a style very similar to algebraic
dynamic programming (ADP). Using a combination of
type-level programming and stream fusion leads to a
substantial increase in performance, without
sacrificing much of the convenience and theoretical
underpinnings of ADP. We draw examples from the field
of computational biology, more specifically RNA
secondary structure prediction, to demonstrate how to
use these combinators and what differences exist
between this library, ADP, and other approaches. The
final version of the combinator library allows writing
algorithms with performance close to hand-optimized C
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Daniels:2012:ERH,
author = "Noah M. Daniels and Andrew Gallant and Norman Ramsey",
title = "Experience report: {Haskell} in computational
biology",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "227--234",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364560",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Haskell gives computational biologists the flexibility
and rapid prototyping of a scripting language, plus the
performance of native code. In our experience,
higher-order functions, lazy evaluation, and monads
really worked, but profiling and debugging presented
obstacles. Also, Haskell libraries vary greatly:
memoization combinators and parallel-evaluation
strategies helped us a lot, but other, nameless
libraries mostly got in our way. Despite the obstacles
and the uncertain quality of some libraries, Haskell's
ecosystem made it easy for us to develop new algorithms
in computational biology.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Foltzer:2012:MSP,
author = "Adam Foltzer and Abhishek Kulkarni and Rebecca Swords
and Sajith Sasidharan and Eric Jiang and Ryan Newton",
title = "A meta-scheduler for the par-monad: composable
scheduling for the heterogeneous cloud",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "235--246",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364562",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern parallel computing hardware demands
increasingly specialized attention to the details of
scheduling and load balancing across heterogeneous
execution resources that may include GPU and cloud
environments, in addition to traditional CPUs. Many
existing solutions address the challenges of particular
resources, but do so in isolation, and in general do
not compose within larger systems. We propose a
general, composable abstraction for execution
resources, along with a continuation-based
meta-scheduler that harnesses those resources in the
context of a deterministic parallel programming library
for Haskell. We demonstrate performance benefits of
combined CPU/GPU scheduling over either alone, and of
combined multithreaded/distributed scheduling over
existing distributed programming approaches for
Haskell.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Bergstrom:2012:NDP,
author = "Lars Bergstrom and John Reppy",
title = "Nested data-parallelism on the {GPU}",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "247--258",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364563",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphics processing units (GPUs) provide both memory
bandwidth and arithmetic performance far greater than
that available on CPUs but, because of their
Single-Instruction-Multiple-Data (SIMD) architecture,
they are hard to program. Most of the programs ported
to GPUs thus far use traditional data-level
parallelism, performing only operations that operate
uniformly over vectors. NESL is a first-order
functional language that was designed to allow
programmers to write irregular-parallel programs ---
such as parallel divide-and-conquer algorithms --- for
wide-vector parallel computers. This paper presents our
port of the NESL implementation to work on GPUs and
provides empirical evidence that nested
data-parallelism (NDP) on GPUs significantly
outperforms CPU-based implementations and matches or
beats newer GPU languages that support only flat
parallelism. While our performance does not match that
of hand-tuned CUDA programs, we argue that the
notational conciseness of NESL is worth the loss in
performance. This work provides the first language
implementation that directly supports NDP on a GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Lippmeier:2012:WEH,
author = "Ben Lippmeier and Manuel M. T. Chakravarty and
Gabriele Keller and Roman Leshchinskiy and Simon Peyton
Jones",
title = "Work efficient higher-order vectorisation",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "259--270",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364564",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing approaches to higher-order vectorisation,
also known as flattening nested data parallelism, do
not preserve the asymptotic work complexity of the
source program. Straightforward examples, such as
sparse matrix-vector multiplication, can suffer a
severe blow-up in both time and space, which limits the
practicality of this method. We discuss why this
problem arises, identify the mis-handling of index
space transforms as the root cause, and present a
solution using a refined representation of nested
arrays. We have implemented this solution in Data
Parallel Haskell (DPH) and present benchmarks showing
that realistic programs, which used to suffer the
blow-up, now have the correct asymptotic work
complexity. In some cases, the asymptotic complexity of
the vectorised program is even better than the
original.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Sewell:2012:TJ,
author = "Peter Sewell",
title = "Tales from the jungle",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "271--272",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364566",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We rely on a computational infrastructure that is a
densely interwined mass of software and hardware:
programming languages, network protocols, operating
systems, and processors. It has accumulated great
complexity, from a combination of engineering design
decisions, contingent historical choices, and sheer
scale, yet it is defined at best by prose
specifications, or, all too often, just by the common
implementations. Can we do better? More specifically,
can we apply rigorous methods to this mainstream
infrastructure, taking the accumulated complexity
seriously, and if we do, does it help? My colleagues
and I have looked at these questions in several
contexts: the TCP/IP network protocols with their
Sockets API; programming language design, including the
Java module system and the C11/C++11 concurrency model;
the hardware concurrency behaviour of x86, IBM POWER,
and ARM multiprocessors; and compilation of concurrent
code. In this talk I will draw some lessons from what
did and did not succeed, looking especially at the
empirical nature of some of the work, at the social
process of engagement with the various different
communities, and at the mathematical and software tools
we used. Domain-specific modelling languages (based on
functional programming ideas) and proof assistants were
invaluable for working with the large and loose
specifications involved: idioms within HOL4 for TCP,
our Ott tool for programming language specification,
and Owens's Lem tool for portable semantic definitions,
with HOL4, Isabelle, and Coq, for the relaxed-memory
concurrency semantics work. Our experience with these
suggests something of what is needed to make full-scale
rigorous semantics a commonplace reality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Wadler:2012:PS,
author = "Philip Wadler",
title = "Propositions as sessions",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "273--286",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364568",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Continuing a line of work by Abramsky (1994), by
Bellin and Scott (1994), and by Caires and Pfenning
(2010), among others, this paper presents CP, a
calculus in which propositions of classical linear
logic correspond to session types. Continuing a line of
work by Honda (1993), by Honda, Kubo, and Vasconcelos
(1998), and by Gay and Vasconcelos (2010), among
others, this paper presents GV, a linear functional
language with session types, and presents a translation
from GV into CP. The translation formalises for the
first time a connection between a standard presentation
of session types and linear logic, and shows how a
modification to the standard presentation yield a
language free from deadlock, where deadlock freedom
follows from the correspondence to linear logic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Henry:2012:TUM,
author = "Gr{\'e}goire Henry and Michel Mauny and Emmanuel
Chailloux and Pascal Manoury",
title = "Typing unmarshalling without marshalling types",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "287--298",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Unmarshalling primitives in statically typed language
require, in order to preserve type safety, to
dynamically verify the compatibility between the
incoming values and the statically expected type. In
the context of programming languages based on
parametric polymorphism and uniform data
representation, we propose a relation of compatibility
between (unmarshalled) memory graphs and types. It is
defined as constraints over nodes of the memory graph.
Then, we propose an algorithm to check the
compatibility between a memory graph and a type. It is
described as a constraint solver based on a rewriting
system. We have shown that the proposed algorithm is
sound and semi-complete in presence of algebraic data
types, mutable data, polymorphic sharing, cycles, and
functional values, however, in its general form, it may
not terminate. We have implemented a prototype tailored
for the OCaml compiler [17] that always terminates and
still seems sufficiently complete in practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Jones:2012:DD,
author = "Will Jones and Tony Field and Tristan Allwood",
title = "Deconstraining {DSLs}",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "299--310",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364571",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Strongly-typed functional languages provide a powerful
framework for embedding Domain-Specific Languages
(DSLs). However, building type-safe functions defined
over an embedded DSL can introduce application-specific
type constraints that end up being imposed on the DSL
data types themselves. At best, these constraints are
unwieldy and at worst they can limit the range of DSL
expressions that can be built. We present a simple
solution to this problem that allows
application-specific constraints to be specified at the
point of use of a DSL expression rather than when the
DSL's embedding types are defined. Our solution applies
equally to both tagged and tagless representations and,
importantly, also works in the presence of higher-rank
types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Mainland:2012:EHM,
author = "Geoffrey Mainland",
title = "Explicitly heterogeneous metaprogramming with
{MetaHaskell}",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "311--322",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364572",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Languages with support for metaprogramming, like
MetaOCaml, offer a principled approach to code
generation by guaranteeing that well-typed metaprograms
produce well-typed programs. However, many problem
domains where metaprogramming can fruitfully be applied
require generating code in languages like C, CUDA, or
assembly. Rather than resorting to add-hoc code
generation techniques, these applications should be
directly supported by explicitly heterogeneous
metaprogramming languages. We present MetaHaskell, an
extension of Haskell 98 that provides modular syntactic
and type system support for type safe metaprogramming
with multiple object languages. Adding a new object
language to MetaHaskell requires only minor
modifications to the host language to support
type-level quantification over object language types
and propagation of type equality constraints. We
demonstrate the flexibility of our approach through
three object languages: a core ML language, a linear
variant of the core ML language, and a subset of C. All
three languages support metaprogramming with open terms
and guarantee that well-typed MetaHaskell programs will
only produce closed object terms that are well-typed.
The essence of MetaHaskell is captured in a type system
for a simplified metalanguage. MetaHaskell, as well as
all three object languages, are fully implemented in
the mhc bytecode compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Axelsson:2012:GAS,
author = "Emil Axelsson",
title = "A generic abstract syntax model for embedded
languages",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "323--334",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364573",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Representing a syntax tree using a data type often
involves having many similar-looking constructors.
Functions operating on such types often end up having
many similar-looking cases. Different languages often
make use of similar-looking constructions. We propose a
generic model of abstract syntax trees capable of
representing a wide range of typed languages. Syntactic
constructs can be composed in a modular fashion
enabling reuse of abstract syntax and syntactic
processing within and across languages. Building on
previous methods of encoding extensible data types in
Haskell, our model is a pragmatic solution to Wadler's
``expression problem''. Its practicality has been
confirmed by its use in the implementation of the
embedded language Feldspar.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Pike:2012:ERD,
author = "Lee Pike and Nis Wegmann and Sebastian Niller and
Alwyn Goodloe",
title = "Experience report: a do-it-yourself high-assurance
compiler",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "335--340",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364553",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded domain-specific languages (EDSLs) are an
approach for quickly building new languages while
maintaining the advantages of a rich metalanguage. We
argue in this experience report that the ``EDSL
approach'' can surprisingly ease the task of building a
high-assurance compiler. We do not strive to build a
fully formally-verified tool-chain, but take a
``do-it-yourself'' approach to increase our confidence
in compiler-correctness without too much effort.
Copilot is an EDSL developed by Galois, Inc. and the
National Institute of Aerospace under contract to NASA
for the purpose of runtime monitoring of
flight-critical avionics. We report our experience in
using type-checking, QuickCheck, and model-checking
``off-the-shelf'' to quickly increase confidence in our
EDSL tool-chain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Vytiniotis:2012:EPD,
author = "Dimitrios Vytiniotis and Simon Peyton Jones and
Jos{\'e} Pedro Magalh{\~a}es",
title = "Equality proofs and deferred type errors: a compiler
pearl",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "341--352",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364554",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Glasgow Haskell Compiler is an optimizing compiler
that expresses and manipulates first-class equality
proofs in its intermediate language. We describe a
simple, elegant technique that exploits these equality
proofs to support deferred type errors. The technique
requires us to treat equality proofs as
possibly-divergent terms; we show how to do so without
losing either soundness or the zero-overhead cost model
that the programmer expects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Neatherway:2012:TBA,
author = "Robin P. Neatherway and Steven J. Ramsay and Chih-Hao
Luke Ong",
title = "A traversal-based algorithm for higher-order model
checking",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "353--364",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364578",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Higher-order model checking --- the model checking of
trees generated by higher-order recursion schemes
(HORS) --- is a natural generalisation of finite-state
and pushdown model checking. Recent work has shown that
it can serve as a basis for software model checking for
functional languages such as ML and Haskell. In this
paper, we introduce higher-order recursion schemes with
cases (HORSC), which extend HORS with a
definition-by-cases construct (to express program
branching based on data) and non-determinism (to
express abstractions of behaviours). This paper is a
study of the universal HORSC model checking problem for
deterministic trivial automata: does the automaton
accept every tree in the tree language generated by the
given HORSC? We first characterise the model checking
problem by an intersection type system extended with a
carefully restricted form of union types. We then
present an algorithm for deciding the model checking
problem, which is based on the notion of traversals
induced by the fully abstract game semantics of these
schemes, but presented as a goal-directed construction
of derivations in the intersection and union type
system. We view HORSC model checking as a suitable
backend engine for an approach to verifying functional
programs. We have implemented the algorithm in a tool
called TravMC, and demonstrated its effectiveness on a
test suite of programs, including abstract models of
functional programs obtained via an
abstraction-refinement procedure from pattern-matching
recursion schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Perera:2012:FPE,
author = "Roly Perera and Umut A. Acar and James Cheney and Paul
Blain Levy",
title = "Functional programs that explain their work",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "365--376",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364579",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present techniques that enable higher-order
functional computations to ``explain'' their work by
answering questions about how parts of their output
were calculated. As explanations, we consider the
traditional notion of program slices, which we show can
be inadequate, and propose a new notion: trace slices.
We present techniques for specifying flexible and rich
slicing criteria based on partial expressions, parts of
which have been replaced by holes. We characterise
program slices in an algorithm-independent fashion and
show that a least slice for a given criterion exists.
We then present an algorithm, called unevaluation, for
computing least program slices from computations
reified as traces. Observing a limitation of program
slices, we develop a notion of trace slice as another
form of explanation and present an algorithm for
computing them. The unevaluation algorithm can be
applied to any subtrace of a trace slice to compute a
program slice whose evaluation generates that subtrace.
This close correspondence between programs, traces, and
their slices can enable the programmer to understand a
computation interactively, in terms of the programming
language in which the computation is expressed. We
present an implementation in the form of a tool,
discuss some important practical implementation
concerns and present some techniques for addressing
them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '12 conference proceedings.",
}
@Article{Suenaga:2012:TBS,
author = "Kohei Suenaga and Ryota Fukuda and Atsushi Igarashi",
title = "Type-based safe resource deallocation for
shared-memory concurrency",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "1--20",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384618",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a type system to guarantee safe resource
deallocation for shared-memory concurrent programs by
extending the previous type system based on fractional
ownerships. Here, safe resource deallocation means that
memory cells, locks, or threads are not left allocated
when a program terminates. Our framework supports (1)
fork/join parallelism, (2) synchronization with locks,
and (3) dynamically allocated memory cells and locks.
The type system is proved to be sound. We also provide
a type inference algorithm for the type system and a
prototype implementation of the algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Gordon:2012:URI,
author = "Colin S. Gordon and Matthew J. Parkinson and Jared
Parsons and Aleks Bromfield and Joe Duffy",
title = "Uniqueness and reference immutability for safe
parallelism",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "21--40",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384619",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A key challenge for concurrent programming is that
side-effects (memory operations) in one thread can
affect the behavior of another thread. In this paper,
we present a type system to restrict the updates to
memory to prevent these unintended side-effects. We
provide a novel combination of immutable and unique
(isolated) types that ensures safe parallelism (race
freedom and deterministic execution). The type system
includes support for polymorphism over type qualifiers,
and can easily create cycles of immutable objects. Key
to the system's flexibility is the ability to recover
immutable or externally unique references after
violating uniqueness without any explicit alias
tracking. Our type system models a prototype extension
to C\# that is in active use by a Microsoft team. We
describe their experiences building large systems with
this extension. We prove the soundness of the type
system by an embedding into a program logic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Sreeram:2012:SCD,
author = "Jaswanth Sreeram and Santosh Pande",
title = "Safe compiler-driven transaction checkpointing and
recovery",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "41--56",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384620",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several studies have shown that a large fraction of
the work performed inside memory transactions in
representative programs is wasted due to the
transaction experiencing a conflict and aborting.
Aborts inside long running transactions are especially
influential to performance and the simplicity of the TM
programming model (relative to using finegrained
locking) in synchronizing large critical sections means
that large transactions are common and this exacerbates
the problem of wasted work. In this paper we present a
practical transaction checkpoint and recovery scheme in
which transactions that experience a conflict can
restore their state (including the local context in
which they were executing) to some dynamic program
point before this access and begin execution from that
point. This state saving and restoration is implemented
by checkpoint operations that are generated by a
compiler into the transaction's body and are also
optimized to reduce the amount of state that is saved
and restored. We also describe a runtime system that
manages these checkpointed states and orchestrates the
restoration of the right checkpointed state for a
conflict on a particular transactional access. Moreover
the synthesis of these save and restore operations,
their optimization and invocation at runtime are
completely transparent to the programmer. We have
implemented the checkpoint generation and optimization
scheme in the LLVM compiler and runtime support for the
TL2 STM system. Our experiments indicate that for many
parallel programs using such checkpoint recovery
schemes can result in upto several orders of magnitude
reduction in number of aborts and significant execution
time speedups relative to plain transactional programs
for the same number of threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Muller:2012:TPS,
author = "Stefan Muller and Stephen Chong",
title = "Towards a practical secure concurrent language",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "57--74",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384621",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We demonstrate that a practical concurrent language
can be extended in a natural way with information
security mechanisms that provably enforce strong
information security guarantees. We extend the X10
concurrent programming language with coarse-grained
information-flow control. Central to X10 concurrency
abstractions is the notion of a place: a container for
data and computation. We associate a security level
with each place, and restrict each place to store only
data appropriate for that security level. When places
interact only with other places at the same security
level, then our security mechanisms impose no
restrictions. When places of differing security levels
interact, our information security analysis prevents
potentially dangerous information flows, including
information flow through covert scheduling channels.
The X10 concurrency mechanisms simplify reasoning about
information flow in concurrent programs. We present a
static analysis that enforces a noninterference-based
extensional information security condition in a
calculus that captures the key aspects of X10's place
abstraction and async-finish parallelism. We extend
this security analysis to support many of X10's
language features, and have implemented a prototype
compiler for the resulting language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Parizek:2012:PAJ,
author = "Pavel Par{\'\i}zek and OndYej Lhot{\'a}k",
title = "Predicate abstraction of {Java} programs with
collections",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "75--94",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384623",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Our goal is to develop precise and scalable
verification techniques for Java programs that use
collections and properties that depend on their
content. We apply the popular approach of predicate
abstraction to Java programs and collections. The main
challenge in this context is precise and compact
modeling of collections that enables practical
verification. We define a predicate language for
modeling the observable state of Java collections at
the interface level. Changes of the state by API
methods are captured by weakest preconditions. We adapt
existing techniques for construction of abstract
programs. Most notably, we designed optimizations based
on specific features of the predicate language. We
evaluated our approach on Java programs that use
collections in advanced ways. Our results show that
interesting properties, such as consistency between
multiple collections, can be verified using our
approach. The properties are specified using logic
formulas that involve predicates introduced by our
language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Schiller:2012:RBW,
author = "Todd W. Schiller and Michael D. Ernst",
title = "Reducing the barriers to writing verified
specifications",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "95--112",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384624",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Formally verifying a program requires significant
skill not only because of complex interactions between
program subcomponents, but also because of deficiencies
in current verification interfaces. These skill
barriers make verification economically unattractive by
preventing the use of less-skilled (less-expensive)
workers and distributed workflows (i.e.,
crowdsourcing). This paper presents VeriWeb, a
web-based IDE for verification that decomposes the task
of writing verifiable specifications into manageable
subproblems. To overcome the information loss caused by
task decomposition, and to reduce the skill required to
verify a program, VeriWeb incorporates several
innovative user interface features: drag and drop
condition construction, concrete counterexamples, and
specification inlining. To evaluate VeriWeb, we
performed three experiments. First, we show that
VeriWeb lowers the time and monetary cost of
verification by performing a comparative study of
VeriWeb and a traditional tool using 14 paid subjects
contracted hourly from Exhedra Solution's vWorker
online marketplace. Second, we demonstrate the dearth
and insufficiency of current ad-hoc labor marketplaces
for verification by recruiting workers from Amazon's
Mechanical Turk to perform verification with VeriWeb.
Finally, we characterize the minimal communication
overhead incurred when VeriWeb is used collaboratively
by observing two pairs of developers each use the tool
simultaneously to verify a single program.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Betts:2012:GVG,
author = "Adam Betts and Nathan Chong and Alastair Donaldson and
Shaz Qadeer and Paul Thomson",
title = "{GPUVerify}: a verifier for {GPU} kernels",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "113--132",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384625",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a technique for verifying race- and
divergence-freedom of GPU kernels that are written in
mainstream kernel programming languages such as OpenCL
and CUDA. Our approach is founded on a novel formal
operational semantics for GPU programming termed
synchronous, delayed visibility (SDV) semantics. The
SDV semantics provides a precise definition of barrier
divergence in GPU kernels and allows kernel
verification to be reduced to analysis of a sequential
program, thereby completely avoiding the need to reason
about thread interleavings, and allowing existing
modular techniques for program verification to be
leveraged. We describe an efficient encoding for data
race detection and propose a method for automatically
inferring loop invariants required for verification. We
have implemented these techniques as a practical
verification tool, GPUVerify, which can be applied
directly to OpenCL and CUDA source code. We evaluate
GPUVerify with respect to a set of 163 kernels drawn
from public and commercial sources. Our evaluation
demonstrates that GPUVerify is capable of efficient,
automatic verification of a large number of real-world
kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Logozzo:2012:MVA,
author = "Francesco Logozzo and Thomas Ball",
title = "Modular and verified automatic program repair",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "133--146",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384626",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study the problem of suggesting code repairs at
design time, based on the warnings issued by modular
program verifiers. We introduce the concept of a
verified repair, a change to a program's source that
removes bad execution traces while increasing the
number of good traces, where the bad/good traces form a
partition of all the traces of a program. Repairs are
property-specific. We demonstrate our framework in the
context of warnings produced by the modular cccheck
(a.k.a. Clousot) abstract interpreter, and generate
repairs for missing contracts, incorrect locals and
objects initialization, wrong conditionals, buffer
overruns, arithmetic overflow and incorrect floating
point comparisons. We report our experience with
automatically generating repairs for the {.NET}
framework libraries, generating verified repairs for
over 80\% of the warnings generated by cccheck.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Kulkarni:2012:MCO,
author = "Sameer Kulkarni and John Cavazos",
title = "Mitigating the compiler optimization phase-ordering
problem using machine learning",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "147--162",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384628",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's compilers have a plethora of optimizations to
choose from, and the correct choice of optimizations
can have a significant impact on the performance of the
code being optimized. Furthermore, choosing the correct
order in which to apply those optimizations has been a
long standing problem in compilation research. Each of
these optimizations interacts with the code and in turn
with all other optimizations in complicated ways.
Traditional compilers typically apply the same set of
optimization in a fixed order to all functions in a
program, without regard the code being optimized.
Understanding the interactions of optimizations is very
important in determining a good solution to the
phase-ordering problem. This paper develops a new
approach that automatically selects good optimization
orderings on a per method basis within a dynamic
compiler. Our approach formulates the phase-ordering
problem as a Markov process and uses a characterization
of the current state of the code being optimized to
creating a better solution to the phase ordering
problem. Our technique uses neuro-evolution to
construct an artificial neural network that is capable
of predicting beneficial optimization ordering for a
piece of code that is being optimized. We implemented
our technique in Jikes RVM and achieved significant
improvements on a set of standard Java benchmarks over
a well-engineered fixed order.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{St-Amour:2012:OCO,
author = "Vincent St-Amour and Sam Tobin-Hochstadt and Matthias
Felleisen",
title = "Optimization coaching: optimizers learn to communicate
with programmers",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "163--178",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384629",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Optimizing compilers map programs in high-level
languages to high-performance target language code. To
most programmers, such a compiler constitutes an
impenetrable black box whose inner workings are beyond
their understanding. Since programmers often must
understand the workings of their compilers to achieve
their desired performance goals, they typically resort
to various forms of reverse engineering, such as
examining compiled code or intermediate forms. Instead,
optimizing compilers should engage programmers in a
dialog. This paper introduces one such possible form of
dialog: optimization coaching. An optimization coach
watches while a program is compiled, analyzes the
results, generates suggestions for enabling further
compiler optimization in the source program, and
presents a suitable synthesis of its results to the
programmer. We present an evaluation based on case
studies, which illustrate how an optimization coach can
help programmers achieve optimizations resulting in
substantial performance improvements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Inoue:2012:AML,
author = "Hiroshi Inoue and Hiroshige Hayashizaki and Peng Wu
and Toshio Nakatani",
title = "Adaptive multi-level compilation in a trace-based
{Java JIT} compiler",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "179--194",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384630",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes our multi-level compilation
techniques implemented in a trace-based Java JIT
compiler (trace-JIT). Like existing multi-level
compilation for method-based compilers, we start JIT
compilation with a small compilation scope and a low
optimization level so the program can start running
quickly. Then we identify hot paths with a timer-based
sampling profiler, generate long traces that capture
the hot paths, and recompile them with a high
optimization level to improve the peak performance. A
key to high performance is selecting long traces that
effectively capture the entire hot paths for upgrade
recompilations. To do this, we introduce a new
technique to generate a directed graph representing the
control flow, a TTgraph, and use the TTgraph in the
trace selection engine to efficiently select long
traces. We show that our multi-level compilation
improves the peak performance of programs by up to
58.5\% and 22.2\% on average compared to compiling all
of the traces only at a low optimization level.
Comparing the performance with our multi-level
compilation to the performance when compiling all of
the traces at a high optimization level, our technique
can reduce the startup times of programs by up to
61.1\% and 31.3\% on average without significant
reduction in the peak performance. Our results show
that our adaptive multi-level compilation can balance
the peak performance and startup time by taking
advantage of different optimization levels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Castanos:2012:BPE,
author = "Jose Castanos and David Edelsohn and Kazuaki Ishizaki
and Priya Nagpurkar and Toshio Nakatani and Takeshi
Ogasawara and Peng Wu",
title = "On the benefits and pitfalls of extending a statically
typed language {JIT} compiler for dynamic scripting
languages",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "195--212",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384631",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Whenever the need to compile a new dynamically typed
language arises, an appealing option is to repurpose an
existing statically typed language Just-In-Time (JIT)
compiler (repurposed JIT compiler). Existing repurposed
JIT compilers (RJIT compilers), however, have not yet
delivered the hoped-for performance boosts. The
performance of JVM languages, for instance, often lags
behind standard interpreter implementations. Even more
customized solutions that extend the internals of a JIT
compiler for the target language compete poorly with
those designed specifically for dynamically typed
languages. Our own Fiorano JIT compiler is an example
of this problem. As a state-of-the-art, RJIT compiler
for Python, the Fiorano JIT compiler outperforms two
other RJIT compilers (Unladen Swallow and Jython), but
still shows a noticeable performance gap compared to
PyPy, today's best performing Python JIT compiler. In
this paper, we discuss techniques that have proved
effective in the Fiorano JIT compiler as well as
limitations of our current implementation. More
importantly, this work offers the first in-depth look
at benefits and limitations of the repurposed JIT
compiler approach. We believe the most common pitfall
of existing RJIT compilers is not focusing sufficiently
on specialization, an abundant optimization opportunity
unique to dynamically typed languages. Unfortunately,
the lack of specialization cannot be overcome by
applying traditional optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Cousot:2012:AIFb,
author = "Patrick M. Cousot and Radhia Cousot and Francesco
Logozzo and Michael Barnett",
title = "An abstract interpretation framework for refactoring
with application to extract methods with contracts",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "213--232",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384633",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Method extraction is a common refactoring feature
provided by most modern IDEs. It replaces a
user-selected piece of code with a call to an
automatically generated method. We address the problem
of automatically inferring contracts (precondition,
postcondition) for the extracted method. We require the
inferred contract: (a) to be valid for the extracted
method (validity); (b) to guard the language and
programmer assertions in the body of the extracted
method by an opportune precondition (safety); (c) to
preserve the proof of correctness of the original code
when analyzing the new method separately
(completeness); and (d) to be the most general possible
(generality). These requirements rule out trivial
solutions (e.g., inlining, projection, etc). We propose
two theoretical solutions to the problem. The first one
is simple and optimal. It is valid, safe, complete and
general but unfortunately not effectively computable
(except for unrealistic finiteness/decidability
hypotheses). The second one is based on an iterative
forward/backward method. We show it to be valid, safe,
and, under reasonable assumptions, complete and
general. We prove that the second solution subsumes the
first. All justifications are provided with respect to
a new, set-theoretic version of Hoare logic (hence
without logic), and abstractions of Hoare logic,
revisited to avoid surprisingly unsound inference
rules. We have implemented the new algorithms on the
top of two industrial-strength tools (CCCheck and the
Microsoft Roslyn CTP). Our experience shows that the
analysis is both fast enough to be used in an
interactive environment and precise enough to generate
good annotations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Zhang:2012:RAJ,
author = "Ying Zhang and Gang Huang and Xuanzhe Liu and Wei
Zhang and Hong Mei and Shunxiang Yang",
title = "Refactoring {Android Java} code for on-demand
computation offloading",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "233--248",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384634",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computation offloading is a promising way to improve
the performance as well as reducing the battery power
consumption of a smartphone application by executing
some parts of the application on a remote server.
Supporting such capability is not easy for smartphone
application developers due to (1) correctness: some
code, e.g., that for GPS, gravity, and other sensors,
can run only on the smartphone so that developers have
to identify which parts of the application cannot be
offloaded; (2) effectiveness: the reduced execution
time must be greater than the network delay caused by
computation offloading so that developers need to
calculate which parts are worth offloading; (3)
adaptability: smartphone applications often face
changes of user requirements and runtime environments
so that developers need to implement the adaptation on
offloading. More importantly, considering the large
number of today's smartphone applications, solutions
applicable for legacy applications will be much more
valuable. In this paper, we present a tool, named
DPartner, that automatically refactors Android
applications to be the ones with computation offloading
capability. For a given Android application, DPartner
first analyzes its bytecode for discovering the parts
worth offloading, then rewrites the bytecode to
implement a special program structure supporting
on-demand offloading, and finally generates two
artifacts to be deployed onto an Android phone and the
server, respectively. We evaluated DPartner on three
real-world Android applications, demonstrating the
reduction of execution time by 46\%-97\% and battery
power consumption by 27\%-83\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Hayden:2012:KEG,
author = "Christopher M. Hayden and Edward K. Smith and Michail
Denchev and Michael Hicks and Jeffrey S. Foster",
title = "{Kitsune}: efficient, general-purpose dynamic software
updating for {C}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "249--264",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384635",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic software updating (DSU) systems allow programs
to be updated while running, thereby permitting
developers to add features and fix bugs without
downtime. This paper introduces Kitsune, a new DSU
system for C whose design has three notable features.
First, Kitsune's updating mechanism updates the whole
program, not individual functions. This mechanism is
more flexible than most prior approaches and places no
restrictions on data representations or allowed
compiler optimizations. Second, Kitsune makes the
important aspects of updating explicit in the program
text, making the program's semantics easy to understand
while minimizing programmer effort. Finally, the
programmer can write simple specifications to direct
Kitsune to generate code that traverses and transforms
old-version state for use by new code; such state
transformation is often necessary, and is significantly
more difficult in prior DSU systems. We have used
Kitsune to update five popular, open-source, single-
and multi-threaded programs, and find that few program
changes are required to use Kitsune, and that it incurs
essentially no performance overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Magill:2012:AOT,
author = "Stephen Magill and Michael Hicks and Suriya
Subramanian and Kathryn S. McKinley",
title = "Automating object transformations for dynamic software
updating",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "265--280",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384636",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic software updating (DSU) systems eliminate
costly downtime by dynamically fixing bugs and adding
features to executing programs. Given a static code
patch, most DSU systems construct runtime code changes
automatically. However, a dynamic update must also
specify how to change the running program's execution
state, e.g., the stack and heap, to make it compatible
with the new code. Constructing such state
transformations correctly and automatically remains an
open problem. This paper presents a solution called
Targeted Object Synthesis (TOS). TOS first executes the
same tests on the old and new program versions
separately, observing the program heap state at a few
corresponding points. Given two corresponding heap
states, TOS matches objects in the two versions using
key fields that uniquely identify objects and correlate
old and new-version objects. Given example object
pairs, TOS then synthesizes the simplest-possible
function that transforms an old-version object to its
new-version counterpart. We show that TOS is effective
on updates to four open-source server programs for
which it generates non-trivial transformation functions
that use conditionals, operate on collections, and fix
memory leaks. These transformations help programmers
understand their changes and apply dynamic software
updates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Sartor:2012:EMT,
author = "Jennfer B. Sartor and Lieven Eeckhout",
title = "Exploring multi-threaded {Java} application
performance on multicore hardware",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "281--296",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384638",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While there have been many studies of how to schedule
applications to take advantage of increasing numbers of
cores in modern-day multicore processors, few have
focused on multi-threaded managed language applications
which are prevalent from the embedded to the server
domain. Managed languages complicate performance
studies because they have additional virtual machine
threads that collect garbage and dynamically compile,
closely interacting with application threads. Further
complexity is introduced as modern multicore machines
have multiple sockets and dynamic frequency scaling
options, broadening opportunities to reduce both power
and running time. In this paper, we explore the
performance of Java applications, studying how best to
map application and virtual machine (JVM) threads to a
multicore, multi-socket environment. We explore both
the cost of separating JVM threads from application
threads, and the opportunity to speed up or slow down
the clock frequency of isolated threads. We perform
experiments with the multi-threaded DaCapo benchmarks
and pseudojbb2005 running on the Jikes Research Virtual
Machine, on a dual-socket, 8-core Intel Nehalem machine
to reveal several novel, and sometimes
counter-intuitive, findings. We believe these insights
are a first but important step towards understanding
and optimizing managed language performance on modern
hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Kumar:2012:WSB,
author = "Vivek Kumar and Daniel Frampton and Stephen M.
Blackburn and David Grove and Olivier Tardieu",
title = "Work-stealing without the baggage",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "297--314",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384639",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Work-stealing is a promising approach for effectively
exploiting software parallelism on parallel hardware. A
programmer who uses work-stealing explicitly identifies
potential parallelism and the runtime then schedules
work, keeping otherwise idle hardware busy while
relieving overloaded hardware of its burden. Prior work
has demonstrated that work-stealing is very effective
in practice. However, work-stealing comes with a
substantial overhead: as much as 2x to 12x slowdown
over orthodox sequential code. In this paper we
identify the key sources of overhead in work-stealing
schedulers and present two significant refinements to
their implementation. We evaluate our work-stealing
designs using a range of benchmarks, four different
work-stealing implementations, including the popular
fork-join framework, and a range of architectures. On
these benchmarks, compared to orthodox sequential Java,
our fastest design has an overhead of just 15\%. By
contrast, fork-join has a 2.3x overhead and the
previous implementation of the system we use has an
overhead of 4.1x. These results and our insight into
the sources of overhead for work-stealing
implementations give further hope to an already
promising technique for exploiting increasingly
available hardware parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Bocq:2012:MUM,
author = "S{\'e}bastien Bocq and Koen Daenen",
title = "{Molecule}: using monadic and streaming {I/O} to
compose process networks on the {JVM}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "315--334",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384640",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Molecule is a domain specific language library
embedded in Scala for easing the creation of scalable
and modular concurrent applications on the JVM.
Concurrent applications are modeled as parallel process
networks that exchange information over mobile and
type-safe messaging interfaces. In this paper, we
present a concurrent programming environment that
combines functional and imperative programming. Using a
monad, we structure the sequential or parallel
coordination of user-level threads, without JVM
modifications or compiler support. Our mobile channel
interfaces expose reusable and parallelizable
higher-order functions, as if they were streams in a
lazily evaluated functional programming language. The
support for graceful termination of entire process
networks is simplified by integrating channel poisoning
with monadic exceptions and resource control. Our
runtime and system-level interfaces leverage message
batching and a novel flow parallel scheduler to limit
expensive context switches in multicore environments.
We illustrate the expressiveness and performance
benefits on a 24-core AMD Opteron machine with three
classical examples: a thread ring, a genuine prime
sieve and a chameneos-redux.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Kalibera:2012:BBA,
author = "Tomas Kalibera and Matthew Mole and Richard Jones and
Jan Vitek",
title = "A black-box approach to understanding concurrency in
{DaCapo}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "335--354",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384641",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Increasing levels of hardware parallelism are one of
the main challenges for programmers and implementers of
managed runtimes. Any concurrency or scalability
improvements must be evaluated experimentally. However,
application benchmarks available today may not reflect
the highly concurrent applications we anticipate in the
future. They may also behave in ways that VM developers
do not expect. We provide a set of platform independent
concurrency related metrics and an in-depth
observational study of current state of the art
benchmarks, discovering how concurrent they really are,
how they scale the work and how they synchronise and
communicate via shared memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Jo:2012:AEL,
author = "Youngjoon Jo and Milind Kulkarni",
title = "Automatically enhancing locality for tree traversals
with traversal splicing",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "355--374",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384643",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Generally applicable techniques for improving temporal
locality in irregular programs, which operate over
pointer-based data structures such as trees and graphs,
are scarce. Focusing on a subset of irregular programs,
namely, tree traversal algorithms like Barnes--Hut and
nearest neighbor, previous work has proposed point
blocking, a technique analogous to loop tiling in
regular programs, to improve locality. However point
blocking is highly dependent on point sorting, a
technique to reorder points so that consecutive points
will have similar traversals. Performing this a priori
sort requires an understanding of the semantics of the
algorithm and hence highly application specific
techniques. In this work, we propose traversal
splicing, a new, general, automatic locality
optimization for irregular tree traversal codes, that
is less sensitive to point order, and hence can deliver
substantially better performance, even in the absence
of semantic information. For six benchmark algorithms,
we show that traversal splicing can deliver
single-thread speedups of up to 9.147 (geometric mean:
3.095) over baseline implementations, and up to 4.752
(geometric mean: 2.079) over point-blocked
implementations. Further, we show that in many cases,
automatically applying traversal splicing to a baseline
implementation yields performance that is better than
carefully hand-optimized implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Prountzos:2012:ESS,
author = "Dimitrios Prountzos and Roman Manevich and Keshav
Pingali",
title = "{Elixir}: a system for synthesizing concurrent graph
programs",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "375--394",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384644",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Algorithms in new application areas like machine
learning and network analysis use ``irregular'' data
structures such as graphs, trees and sets. Writing
efficient parallel code in these problem domains is
very challenging because it requires the programmer to
make many choices: a given problem can usually be
solved by several algorithms, each algorithm may have
many implementations, and the best choice of algorithm
and implementation can depend not only on the
characteristics of the parallel platform but also on
properties of the input data such as the structure of
the graph. One solution is to permit the application
programmer to experiment with different algorithms and
implementations without writing every variant from
scratch. Auto-tuning to find the best variant is a more
ambitious solution. These solutions require a system
for automatically producing efficient parallel
implementations from high-level specifications. Elixir,
the system described in this paper, is the first step
towards this ambitious goal. Application programmers
write specifications that consist of an operator, which
describes the computations to be performed, and a
schedule for performing these computations. Elixir uses
sophisticated inference techniques to produce efficient
parallel code from such specifications. We used Elixir
to automatically generate many parallel implementations
for three irregular problems: breadth-first search,
single source shortest path, and betweenness-centrality
computation. Our experiments show that the best
generated variants can be competitive with handwritten
code for these problems from other research groups; for
some inputs, they even outperform the handwritten
versions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Liu:2012:CED,
author = "Yanhong A. Liu and Scott D. Stoller and Bo Lin and
Michael Gorbovitski",
title = "From clarity to efficiency for distributed
algorithms",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "395--410",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384645",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes a very high-level language for
clear description of distributed algorithms and
optimizations necessary for generating efficient
implementations. The language supports high-level
control flows where complex synchronization conditions
can be expressed using high-level queries, especially
logic quantifications, over message history sequences.
Unfortunately, the programs would be extremely
inefficient, including consuming unbounded memory, if
executed straightforwardly. We present new
optimizations that automatically transform complex
synchronization conditions into incremental updates of
necessary auxiliary values as messages are sent and
received. The core of the optimizations is the first
general method for efficient implementation of logic
quantifications. We have developed an operational
semantics of the language, implemented a prototype of
the compiler and the optimizations, and successfully
used the language and implementation on a variety of
important distributed algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Leino:2012:PEJ,
author = "K. Rustan M. Leino and Aleksandar Milicevic",
title = "Program extrapolation with {Jennisys}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "411--430",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384646",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The desired behavior of a program can be described
using an abstract model. Compiling such a model into
executable code requires advanced compilation
techniques known as synthesis. This paper presents an
object-based language, called Jennisys, where
programming is done by introducing an abstract model,
defining a concrete data representation for the model,
and then being aided by automatic synthesis to produce
executable code. The paper also presents a synthesis
technique for the language. The technique is built on
an automatic program verifier that, via an underlying
SMT solver, is capable of providing concrete models to
failed verifications. The technique proceeds by
obtaining sample input/output values from concrete
models and then extrapolating programs from the sample
points. The synthesis aims to produce code with
assignments, branching structure, and possibly
recursive calls. It is the first to synthesize code
that creates and uses objects in dynamic data
structures or aggregate objects. A prototype of the
language and synthesis technique has been
implemented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Kling:2012:BDI,
author = "Michael Kling and Sasa Misailovic and Michael Carbin
and Martin Rinard",
title = "{Bolt}: on-demand infinite loop escape in unmodified
binaries",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "431--450",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384648",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Bolt, a novel system for escaping from
infinite and long-running loops. Directed by a user,
Bolt can attach to a running process and determine if
the program is executing an infinite loop. If so, Bolt
can deploy multiple strategies to escape the loop,
restore the responsiveness of the program, and enable
the program to deliver useful output. Bolt operates on
stripped x86 and x64 binaries, dynamically attaches and
detaches to and from the program as needed, and
dynamically detects loops and creates program state
checkpoints to enable exploration of different escape
strategies. Bolt can detect and escape from loops in
off-the-shelf software, without available source code,
and with no overhead in standard production use.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Huang:2012:LSC,
author = "Jeff Huang and Charles Zhang",
title = "{LEAN}: simplifying concurrency bug reproduction via
replay-supported execution reduction",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "451--466",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384649",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Debugging concurrent programs is known to be difficult
due to scheduling non-determinism. The technique of
multiprocessor deterministic replay substantially
assists debugging by making the program execution
reproducible. However, facing the huge replay traces
and long replay time, the debugging task remains
stunningly challenging for long running executions. We
present a new technique, LEAN, on top of replay, that
significantly reduces the complexity of the replay
trace and the length of the replay time without losing
the determinism in reproducing concurrency bugs. The
cornerstone of our work is a redundancy criterion that
characterizes the redundant computation in a buggy
trace. Based on the redundancy criterion, we have
developed two novel techniques to automatically
identify and remove redundant threads and instructions
in the bug reproduction execution. Our evaluation
results with several real world concurrency bugs in
large complex server programs demonstrate that LEAN is
able to reduce the size, the number of threads, and the
number of thread context switches of the replay trace
by orders of magnitude, and accordingly greatly shorten
the replay time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Effinger-Dean:2012:IIF,
author = "Laura Effinger-Dean and Brandon Lucia and Luis Ceze
and Dan Grossman and Hans-J. Boehm",
title = "{IFRit}: interference-free regions for dynamic
data-race detection",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "467--484",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384650",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new algorithm for dynamic data-race
detection. Our algorithm reports no false positives and
runs on arbitrary C and C++ code. Unlike previous
algorithms, we do not have to instrument every memory
access or track a full happens-before relation. Our
data-race detector, which we call IFRit, is based on a
run-time abstraction called an interference-free region
(IFR). An IFR is an interval of one thread's execution
during which any write to a specific variable by a
different thread is a data race. We insert
instrumentation at compile time to monitor active IFRs
at run-time. If the runtime observes overlapping IFRs
for conflicting accesses to the same variable in two
different threads, it reports a race. The static
analysis aggregates information for multiple accesses
to the same variable, avoiding the expense of having to
instrument every memory access in the program. We
directly compare IFRit to FastTrack and
ThreadSanitizer, two state-of-the-art fully-precise
data-race detectors. We show that IFRit imposes a
fraction of the overhead of these detectors. We show
that for the PARSEC benchmarks, and several real-world
applications, IFRit finds many of the races detected by
a fully-precise detector. We also demonstrate that
sampling can further reduce IFRit's performance
overhead without completely forfeiting precision.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Yu:2012:MCD,
author = "Jie Yu and Satish Narayanasamy and Cristiano Pereira
and Gilles Pokam",
title = "{Maple}: a coverage-driven testing tool for
multithreaded programs",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "485--502",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384651",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Testing multithreaded programs is a hard problem,
because it is challenging to expose those rare
interleavings that can trigger a concurrency bug. We
propose a new thread interleaving coverage-driven
testing tool called Maple that seeks to expose untested
thread interleavings as much as possible. It memoizes
tested interleavings and actively seeks to expose
untested interleavings for a given test input to
increase interleaving coverage. We discuss several
solutions to realize the above goal. First, we discuss
a coverage metric based on a set of interleaving
idioms. Second, we discuss an online technique to
predict untested interleavings that can potentially be
exposed for a given test input. Finally, the predicted
untested interleavings are exposed by actively
controlling the thread schedule while executing for the
test input. We discuss our experiences in using the
tool to expose several known and unknown bugs in
real-world applications such as Apache and MySQL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Dubrau:2012:TM,
author = "Anton Willy Dubrau and Laurie Jane Hendren",
title = "Taming {MATLAB}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "503--522",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384653",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "MATLAB is a dynamic scientific language used by
scientists, engineers and students worldwide. Although
MATLAB is very suitable for rapid prototyping and
development, MATLAB users often want to convert their
final MATLAB programs to a static language such as
FORTRAN. This paper presents an extensible
object-oriented toolkit for supporting the generation
of static programs from dynamic MATLAB programs. Our
open source toolkit, called the MATLAB Tamer,
identifies a large tame subset of MATLAB, supports the
generation of a specialized Tame IR for that subset,
provides a principled approach to handling the large
number of builtin MATLAB functions, and supports an
extensible interprocedural value analysis for
estimating MATLAB types and call graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Siddiqui:2012:SSE,
author = "Junaid Haroon Siddiqui and Sarfraz Khurshid",
title = "Scaling symbolic execution using ranged analysis",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "523--536",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384654",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces a novel approach to scale
symbolic execution --- a program analysis technique for
systematic exploration of bounded execution paths---for
test input generation. While the foundations of
symbolic execution were developed over three decades
ago, recent years have seen a real resurgence of the
technique, specifically for systematic bug finding.
However, scaling symbolic execution remains a primary
technical challenge due to the inherent complexity of
the path-based exploration that lies at core of the
technique. Our key insight is that the state of the
analysis can be represented highly compactly: a test
input is all that is needed to effectively encode the
state of a symbolic execution run. We present ranged
symbolic execution, which embodies this insight and
uses two test inputs to define a range, i.e., the
beginning and end, for a symbolic execution run. As an
application of our approach, we show how it enables
scalability by distributing the path exploration---both
in a sequential setting with a single worker node and
in a parallel setting with multiple workers. As an
enabling technology, we leverage the open-source,
state-of-the-art symbolic execution tool KLEE.
Experimental results using 71 programs chosen from the
widely deployed GNU Coreutils set of Unix utilities
show that our approach provides a significant speedup
over KLEE. For example, using 10 worker cores, we
achieve an average speed-up of 6.6X for the 71
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Tobin-Hochstadt:2012:HOS,
author = "Sam Tobin-Hochstadt and David {Van Horno}",
title = "Higher-order symbolic execution via contracts",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "537--554",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384655",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new approach to automated reasoning about
higher-order programs by extending symbolic execution
to use behavioral contracts as symbolic values, thus
enabling symbolic approximation of higher-order
behavior. Our approach is based on the idea of an
abstract reduction semantics that gives an operational
semantics to programs with both concrete and symbolic
components. Symbolic components are approximated by
their contract and our semantics gives an operational
interpretation of contracts-as-values. The result is an
executable semantics that soundly predicts program
behavior, including contract failures, for all possible
instantiations of symbolic components. We show that our
approach scales to an expressive language of contracts
including arbitrary programs embedded as predicates,
dependent function contracts, and recursive contracts.
Supporting this rich language of specifications leads
to powerful symbolic reasoning using existing program
constructs. We then apply our approach to produce a
verifier for contract correctness of components,
including a sound and computable approximation to our
semantics that facilitates fully automated contract
verification. Our implementation is capable of
verifying contracts expressed in existing programs, and
of justifying contract-elimination optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Rosu:2012:CRU,
author = "Grigore Rosu and Andrei Stefanescu",
title = "Checking reachability using matching logic",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "555--574",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384656",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a verification framework that is
parametric in a (trusted) operational semantics of some
programming language. The underlying proof system is
language-independent and consists of eight proof rules.
The proof system is proved partially correct and
relatively complete (with respect to the programming
language configuration model). To show its
practicality, the generic framework is instantiated
with a fragment of C and evaluated with encouraging
results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Zhao:2012:HCP,
author = "Haiping Zhao and Iain Proctor and Minghui Yang and Xin
Qi and Mark Williams and Qi Gao and Guilherme Ottoni
and Andrew Paroski and Scott MacVicar and Jason Evans
and Stephen Tu",
title = "The {HipHop} compiler for {PHP}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "575--586",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384658",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scripting languages are widely used to quickly
accomplish a variety of tasks because of the high
productivity they enable. Among other reasons, this
increased productivity results from a combination of
extensive libraries, fast development cycle, dynamic
typing, and polymorphism. The dynamic features of
scripting languages are traditionally associated with
interpreters, which is the approach used to implement
most scripting languages. Although easy to implement,
interpreters are generally slow, which makes scripting
languages prohibitive for implementing large,
CPU-intensive applications. This efficiency problem is
particularly important for PHP given that it is the
most commonly used language for server-side web
development. This paper presents the design,
implementation, and an evaluation of the HipHop
compiler for PHP. HipHop goes against the standard
practice and implements a very dynamic language through
static compilation. After describing the most
challenging PHP features to support through static
compilation, this paper presents HipHop's design and
techniques that support almost all PHP features. We
then present a thorough evaluation of HipHop running
both standard benchmarks and the Facebook web site.
Overall, our experiments demonstrate that HipHop is
about 5.5x faster than standard, interpreted PHP
engines. As a result, HipHop has reduced the number of
servers needed to run Facebook and other web sites by a
factor between 4 and 6, thus drastically cutting
operating costs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Chugh:2012:DTJ,
author = "Ravi Chugh and David Herman and Ranjit Jhala",
title = "Dependent types for {JavaScript}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "587--606",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384659",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Dependent JavaScript (DJS), a statically
typed dialect of the imperative, object-oriented,
dynamic language. DJS supports the particularly
challenging features such as run-time type-tests,
higher-order functions, extensible objects, prototype
inheritance, and arrays through a combination of nested
refinement types, strong updates to the heap, and heap
unrolling to precisely track prototype hierarchies.
With our implementation of DJS, we demonstrate that the
type system is expressive enough to reason about a
variety of tricky idioms found in small examples drawn
from several sources, including the popular book
JavaScript: The Good Parts and the SunSpider benchmark
suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Meawad:2012:EBS,
author = "Fadi Meawad and Gregor Richards and Flor{\'e}al
Morandat and Jan Vitek",
title = "{Eval} begone!: semi-automated removal of {\tt eval}
from {JavaScript} programs",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "607--620",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384660",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Eval endows JavaScript developers with great power. It
allows developers and end-users, by turning text into
executable code, to seamlessly extend and customize the
behavior of deployed applications as they are running.
With great power comes great responsibility, though not
in our experience. In previous work we demonstrated
through a large corpus study that programmers wield
that power in rather irresponsible and arbitrary ways.
We showed that most calls to eval fall into a small
number of very predictable patterns. We argued that
those patterns could easily be recognized by an
automated algorithm and that they could almost always
be replaced with safer JavaScript idioms. In this paper
we set out to validate our claim by designing and
implementing a tool, which we call Evalorizer, that can
assist programmers in getting rid of their unneeded
evals. We use the tool to remove eval from a real-world
website and validated our approach over logs taken from
the top 100 websites with a success rate over 97\%
under an open world assumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Kang:2012:FSJ,
author = "Seonghoon Kang and Sukyoung Ryu",
title = "Formal specification of a {JavaScript} module system",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "621--638",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384661",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The JavaScript programming language, originally
developed as a simple scripting language, is now the
language of choice for web applications. All the top
100 sites on the web use JavaScript and its use outside
web pages is rapidly growing. However, JavaScript is
not yet ready for programming in the large: it does not
support a module system. Lack of namespaces introduces
module patterns, and makes it difficult to use multiple
JavaScript frameworks together. In this paper, we
propose a formal specification of a JavaScript module
system. A module system for JavaScript will allow safe
and incremental development of JavaScript web
applications. While the next version of the JavaScript
standard proposes a module system, it informally
describes its design in prose. We formally specify a
module system as an extension to the existing
JavaScript language, and rigorously describe its
semantics via desugaring to LambdaJS, a prior core
calculus for JavaScript. We implement the desugaring
process and show its faithfulness using real-world test
suites. Finally, we define a set of properties for
valid JavaScript programs using modules and formally
prove that the proposed module system satisfies the
validity properties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Barowy:2012:API,
author = "Daniel W. Barowy and Charlie Curtsinger and Emery D.
Berger and Andrew McGregor",
title = "{AutoMan}: a platform for integrating human-based and
digital computation",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "639--654",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384663",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Humans can perform many tasks with ease that remain
difficult or impossible for computers. Crowdsourcing
platforms like Amazon's Mechanical Turk make it
possible to harness human-based computational power at
an unprecedented scale. However, their utility as a
general-purpose computational platform remains limited.
The lack of complete automation makes it difficult to
orchestrate complex or interrelated tasks. Scheduling
more human workers to reduce latency costs real money,
and jobs must be monitored and rescheduled when workers
fail to complete their tasks. Furthermore, it is often
difficult to predict the length of time and payment
that should be budgeted for a given task. Finally, the
results of human-based computations are not necessarily
reliable, both because human skills and accuracy vary
widely, and because workers have a financial incentive
to minimize their effort. This paper introduces
AutoMan, the first fully automatic crowdprogramming
system. AutoMan integrates human-based computations
into a standard programming language as ordinary
function calls, which can be intermixed freely with
traditional functions. This abstraction lets AutoMan
programmers focus on their programming logic. An
AutoMan program specifies a confidence level for the
overall computation and a budget. The AutoMan runtime
system then transparently manages all details necessary
for scheduling, pricing, and quality control. AutoMan
automatically schedules human tasks for each
computation until it achieves the desired confidence
level; monitors, reprices, and restarts human tasks as
necessary; and maximizes parallelism across human
workers while staying under budget.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Datta:2012:TVW,
author = "Subhajit Datta and Renuka Sindhgatta and Bikram
Sengupta",
title = "Talk versus work: characteristics of developer
collaboration on the {Jazz} platform",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "655--668",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384664",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "IBM's Jazz initiative offers a state-of-the-art
collaborative development environment (CDE)
facilitating developer interactions around
interdependent units of work. In this paper, we analyze
development data across two versions of a major IBM
product developed on the Jazz platform, covering in
total 19 months of development activity, including
17,000+ work items and 61,000+ comments made by more
than 190 developers in 35 locations. By examining the
relation between developer talk and work, we find
evidence that developers maintain a reasonably high
level of connectivity with peer developers with whom
they share work dependencies, but the span of a
developer's communication goes much beyond the known
dependencies of his/her work items. Using multiple
linear regression models, we find that the number of
defects owned by a developer is impacted by the number
of other developers (s)he is connected through talk,
his/her interpersonal influence in the network of work
dependencies, the number of work items (s)he comments
on, and the number work items (s)he owns. These effects
are maintained even after controlling for workload,
role, work dependency, and connection related factors.
We discuss the implications of our results for
collaborative software development and project
governance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Muulu:2012:SAI,
author = "Kivan{\c{c}} Mu{\^u}lu and Yuriy Brun and Reid Holmes
and Michael D. Ernst and David Notkin",
title = "Speculative analysis of integrated development
environment recommendations",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "669--682",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384665",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern integrated development environments make
recommendations and automate common tasks, such as
refactorings, auto-completions, and error corrections.
However, these tools present little or no information
about the consequences of the recommended changes. For
example, a rename refactoring may: modify the source
code without changing program semantics; modify the
source code and (incorrectly) change program semantics;
modify the source code and (incorrectly) create
compilation errors; show a name collision warning and
require developer input; or show an error and not
change the source code. Having to compute the
consequences of a recommendation --- either mentally or
by making source code changes --- puts an extra burden
on the developers. This paper aims to reduce this
burden with a technique that informs developers of the
consequences of code transformations. Using Eclipse
Quick Fix as a domain, we describe a plug-in, Quick Fix
Scout, that computes the consequences of Quick Fix
recommendations. In our experiments, developers
completed compilation-error removal tasks 10\% faster
when using Quick Fix Scout than Quick Fix, although the
sample size was not large enough to show statistical
significance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Mayer:2012:ESI,
author = "Clemens Mayer and Stefan Hanenberg and Romain Robbes
and {\'E}ric Tanter and Andreas Stefik",
title = "An empirical study of the influence of static type
systems on the usability of undocumented software",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "683--702",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384666",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Abstract Although the study of static and dynamic type
systems plays a major role in research, relatively
little is known about the impact of type systems on
software development. Perhaps one of the more common
arguments for static type systems in languages such as
Java or C++ is that they require developers to annotate
their code with type names, which is thus claimed to
improve the documentation of software. In contrast, one
common argument against static type systems is that
they decrease flexibility, which may make them harder
to use. While these arguments are found in the
literature, rigorous empirical evidence is lacking. We
report on a controlled experiment where 27 subjects
performed programming tasks on an undocumented API with
a static type system (requiring type annotations) as
well as a dynamic type system (which does not). Our
results show that for some tasks, programmers had
faster completion times using a static type system,
while for others, the opposite held. We conduct an
exploratory study to try and theorize why.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Tseng:2012:SDT,
author = "Hung-Wei Tseng and Dean Michael Tullsen",
title = "Software data-triggered threads",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "703--716",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384668",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The data-triggered threads (DTT) programming and
execution model can increase parallelism and eliminate
redundant computation. However, the initial proposal
requires significant architecture support, which
impedes existing applications and architectures from
taking advantage of this model. This work proposes a
pure software solution that supports the DTT model
without any hardware support. This research uses a
prototype compiler and runtime libraries running on top
of existing machines. Several enhancements to the
initial software implementation are presented, which
further improve the performance. The software runtime
system improves the performance of serial C SPEC
benchmarks by 15\% on a Nehalem processor, but by over
7X over the full suite of single-thread applications.
It is shown that the DTT model can work in conjunction
with traditional parallelism. The DTT model provides up
to 64X speedup over parallel applications exploiting
traditional parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Anderson:2012:ECP,
author = "Zachary Anderson",
title = "Efficiently combining parallel software using
fine-grained, language-level, hierarchical resource
management policies",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "717--736",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384669",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents Poli-C, a language extension,
runtime library, and system daemon enabling
fine-grained, language-level, hierarchical resource
management policies. Poli-C is suitable for use in
applications that compose parallel libraries,
frameworks, and programs. In particular, we have added
a powerful new statement to C for expressing resource
limits and guarantees in such a way that programmers
can set resource management policies even when the
source code of parallel libraries and frameworks is not
available. Poli-C enables application programmers to
manage any resource exposed by the underlying OS, for
example cores or IO bandwidth. Additionally, we have
developed a domain-specific language for defining
high-level resource management policies, and a facility
for extending the kinds of resources that can be
managed with our language extension. Finally, through a
number of useful variations, our design offers a high
degree of composability. We evaluate Poli-C by way of
three case-studies: a scientific application, an image
processing webserver, and a pair of parallel database
join implementations. We found that using Poli-C yields
efficiency gains that require the addition of only a
few lines of code to applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Huang:2012:EPS,
author = "Jeff Huang and Charles Zhang",
title = "Execution privatization for scheduler-oblivious
concurrent programs",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "737--752",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384670",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Making multithreaded execution less non-deterministic
is a promising solution to address the difficulty of
concurrent programming plagued by the non-deterministic
thread scheduling. In fact, a vast category of
concurrent programs are scheduler-oblivious: their
execution is deterministic, regardless of the
scheduling behavior. We present and formally prove a
fundamental observation of the privatizability property
for scheduler-oblivious programs, that paves the
theoretical foundation for privatizing shared data
accesses on a path segment. With privatization, the
non-deterministic thread interleavings on the
privatized accesses are isolated and as the consequence
many concurrency problems are alleviated. We further
present a path and context sensitive privatization
algorithm that safely privatizes the program without
introducing any additional program behavior. Our
evaluation results show that the privatization
opportunity pervasively exists in real world large
complex concurrent systems. Through privatization,
several real concurrency bugs are fixed and notable
performance improvements are also achieved on
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Imam:2012:ITP,
author = "Shams M. Imam and Vivek Sarkar",
title = "Integrating task parallelism with actors",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "753--772",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384671",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces a unified concurrent programming
model combining the previously developed Actor Model
(AM) and the task-parallel Async-Finish Model (AFM).
With the advent of multi-core computers, there is a
renewed interest in programming models that can support
a wide range of parallel programming patterns. The
proposed unified model shows how the divide-and-conquer
approach of the AFM and the no-shared mutable state and
event-driven philosophy of the AM can be combined to
solve certain classes of problems more efficiently and
productively than either of the aforementioned models
individually. The unified model adds actor creation and
coordination to the AFM, while also enabling
parallelization within actors. This paper describes two
implementations of the unified model as extensions of
Habanero-Java and Habanero-Scala. The unified model
adds to the foundations of parallel programs, and to
the tools available for the programmer to aid in
productivity and performance while developing parallel
software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Kastner:2012:VAM,
author = "Christian K{\"a}stner and Klaus Ostermann and
Sebastian Erdweg",
title = "A variability-aware module system",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "773--792",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384673",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Module systems enable a divide and conquer strategy to
software development. To implement compile-time
variability in software product lines, modules can be
composed in different combinations. However, this way,
variability dictates a dominant decomposition. As an
alternative, we introduce a variability-aware module
system that supports compile-time variability inside a
module and its interface. So, each module can be
considered a product line that can be type checked in
isolation. Variability can crosscut multiple modules.
The module system breaks with the antimodular tradition
of a global variability model in product-line
development and provides a path toward software
ecosystems and product lines of product lines developed
in an open fashion. We discuss the design and
implementation of such a module system on a core
calculus and provide an implementation for C as part of
the TypeChef project. Our implementation supports
variability inside modules from {\tt \#ifdef}
preprocessor directives and variable linking at the
composition level. With our implementation, we type
check all configurations of all modules of the open
source product line Busybox with 811 compile-time
options, perform linker check of all configurations,
and report found type and linker errors --- without
resorting to a brute-force strategy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Takikawa:2012:GTF,
author = "Asumu Takikawa and T. Stephen Strickland and Christos
Dimoulas and Sam Tobin-Hochstadt and Matthias
Felleisen",
title = "Gradual typing for first-class classes",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "793--810",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384674",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic type-checking and object-oriented programming
often go hand-in-hand; scripting languages such as
Python, Ruby, and JavaScript all embrace
object-oriented (OO) programming. When scripts written
in such languages grow and evolve into large programs,
the lack of a static type discipline reduces
maintainability. A programmer may thus wish to migrate
parts of such scripts to a sister language with a
static type system. Unfortunately, existing type
systems neither support the flexible OO composition
mechanisms found in scripting languages nor accommodate
sound interoperation with untyped code. In this paper,
we present the design of a gradual typing system that
supports sound interaction between statically- and
dynamically-typed units of class-based code. The type
system uses row polymorphism for classes and thus
supports mixin-based OO composition. To protect
migration of mixins from typed to untyped components,
the system employs a novel form of contracts that
partially seal classes. The design comes with a theorem
that guarantees the soundness of the type system even
in the presence of untyped components.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Tardieu:2012:CK,
author = "Olivier Tardieu and Nathaniel Nystrom and Igor
Peshansky and Vijay Saraswat",
title = "Constrained kinds",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "811--830",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384675",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern object-oriented languages such as X10 require a
rich framework for types capable of expressing both
value-dependency and genericity, and supporting
pluggable, domain-specific extensions. In earlier work,
we presented a framework for constrained types in
object-oriented languages, parametrized by an
underlying constraint system. Types are viewed as
formulas {Cc} where C is the name of a class or an
interface and c is a constraint on the immutable
instance state (the properties) of C. Constraint
systems are a very expressive framework for partial
information. Many (value-)dependent type systems for
object-oriented languages can be viewed as constrained
types. This paper extends the constrained types
approach to handle type-dependency (``genericity'').
The key idea is to introduce constrained kinds: in the
same way that constraints on values can be used to
define constrained types, constraints on types can
define constrained kinds. We develop a core programming
language with constrained kinds. Generic types are
supported by introducing type variables---literally,
variables with ``type'' Type---and permitting programs
to impose subtyping and equality constraints on such
variables. We formalize the type-checking rules and
establish soundness. While the language now intertwines
constraints on types and values, its type system
remains parametric in the choice of the value
constraint system (language and solver). We demonstrate
that constrained kinds are expressive and practical and
sketch possible extensions with a discussion of the
design and implementation of X10.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Cohen:2012:ET,
author = "Michael Cohen and Haitao Steve Zhu and Emgin Ezgi
Senem and Yu David Liu",
title = "Energy types",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "831--850",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384676",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a novel type system to promote and
facilitate energy-aware programming. Energy Types is
built upon a key insight into today's energy-efficient
systems and applications: despite the popular
perception that energy and power can only be described
in joules and watts, real-world energy management is
often based on discrete phases and modes, which in turn
can be reasoned about by type systems very effectively.
A phase characterizes a distinct pattern of program
workload, and a mode represents an energy state the
program is expected to execute in. This paper describes
a programming model where phases and modes can be
intuitively specified by programmers or inferred by the
compiler as type information. It demonstrates how a
type-based approach to reasoning about phases and modes
can help promote energy efficiency. The soundness of
our type system and the invariants related to
inter-phase and inter-mode interactions are rigorously
proved. Energy Types is implemented as the core of a
prototyped object-oriented language ET for smartphone
programming. Preliminary studies show ET can lead to
significant energy savings for Android Apps.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Wu:2012:EIS,
author = "Bo Wu and Zhijia Zhao and Xipeng Shen and Yunlian
Jiang and Yaoqing Gao and Raul Silvera",
title = "Exploiting inter-sequence correlations for program
behavior prediction",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "851--866",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384678",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Prediction of program dynamic behaviors is fundamental
to program optimizations, resource management, and
architecture reconfigurations. Most existing predictors
are based on locality of program behaviors, subject to
some inherent limitations. In this paper, we revisit
the design philosophy and systematically explore a
second source of clues: statistical correlations
between the behavior sequences of different program
entities. Concentrated on loops, it examines the
correlations' existence, strength, and values in
enhancing the design of program behavior predictors. It
creates the first taxonomy of program behavior sequence
patterns. It develops a new form of predictors, named
sequence predictors, to effectively translate the
correlations into large-scope, proactive predictions of
program behavior sequences. It demonstrates the
usefulness of the prediction in dynamic version
selection and loop importance estimation, showing 19\%
average speedup on a number of real-world utility
applications. By taking scope and timing of behavior
prediction as the first-order design objectives, the
new approach overcomes limitations of existing program
behavior predictors, opening up many new opportunities
for runtime optimizations at various layers of
computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Ausiello:2012:KCC,
author = "Giorgio Ausiello and Camil Demetrescu and Irene
Finocchi and Donatella Firmani",
title = "$k$-Calling context profiling",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "867--878",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384679",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Calling context trees are one of the most fundamental
data structures for representing the interprocedural
control flow of a program, providing valuable
information for program understanding and optimization.
Nodes of a calling context tree associate performance
metrics to whole distinct paths in the call graph
starting from the root function. However, no explicit
information is provided for detecting short hot
sequences of activations, which may be a better
optimization target in large modular programs where
groups of related functions are reused in many
different parts of the code. Furthermore, calling
context trees can grow prohibitively large in some
scenarios. Another classical approach, called edge
profiling, collects performance metrics for
caller-callee pairs in the call graph, allowing it to
detect hot paths of fixed length one. We study a
generalization of edge and context-sensitive profiles
by introducing a novel data structure called k-calling
context forest (k-CCF). Nodes in a k-CCF associate
performance metrics to paths of length at most k that
lead to each distinct routine of the program, providing
edge profiles for k=1, full context-sensitive profiles
for k equal to infinity, as well as any other
intermediate point in the spectrum. We study the
properties of the k-CCF both theoretically and
experimentally on a large suite of prominent Linux
applications, showing how to construct it efficiently
and discussing its relationships with the calling
context tree. Our experiments show that the k-CCF can
provide effective space-accuracy tradeoffs for
interprocedural contextual profiling, yielding useful
clues to the hot spots of a program that may be hidden
in a calling context tree and using less space for
small values of k, which appear to be the most
interesting in practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Huang:2012:RRC,
author = "Wei Huang and Ana Milanova and Werner Dietl and
Michael D. Ernst",
title = "{Reim \& ReImInfer}: checking and inference of
reference immutability and method purity",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "879--896",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384680",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reference immutability ensures that a reference is not
used to modify the referenced object, and enables the
safe sharing of object structures. A pure method does
not cause side-effects on the objects that existed in
the pre-state of the method execution. Checking and
inference of reference immutability and method purity
enables a variety of program analyses and
optimizations. We present ReIm, a type system for
reference immutability, and ReImInfer, a corresponding
type inference analysis. The type system is concise and
context-sensitive. The type inference analysis is
precise and scalable, and requires no manual
annotations. In addition, we present a novel
application of the reference immutability type system:
method purity inference. To support our theoretical
results, we implemented the type system and the type
inference analysis for Java. We include a type checker
to verify the correctness of the inference result.
Empirical results on Java applications and libraries of
up to 348kLOC show that our approach achieves both
scalability and precision.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Bao:2012:WBS,
author = "Tao Bao and Yunhui Zheng and Xiangyu Zhang",
title = "White box sampling in uncertain data processing
enabled by program analysis",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "897--914",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384681",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sampling is a very important and low-cost approach to
uncertain data processing, in which output variations
caused by input errors are sampled. Traditional methods
tend to treat a program as a blackbox. In this paper,
we show that through program analysis, we can expose
the internals of sample executions so that the process
can become more selective and focused. In particular,
we develop a sampling runtime that can selectively
sample in input error bounds to expose discontinuity in
output functions. It identifies all the program factors
that can potentially lead to discontinuity and hash the
values of such factors during execution in a
cost-effective way. The hash values are used to guide
the sampling process. Our results show that the
technique is very effective for real-world programs. It
can achieve the precision of a high sampling rate with
the cost of a lower sampling rate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Lucas:2012:DPM,
author = "Charles Lucas and Sebastian Elbaum and David S.
Rosenblum",
title = "Detecting problematic message sequences and
frequencies in distributed systems",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "915--926",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384683",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Testing the components of a distributed system is
challenging as it requires consideration of not just
the state of a component, but also the sequence of
messages it may receive from the rest of the system or
the environment. Such messages may vary in type and
content, and more particularly, in the frequency at
which they are generated. All of these factors, in the
right combination, may lead to faulty behavior. In this
paper we present an approach to address these
challenges by systematically analyzing a component in a
distributed system to identify specific message
sequences and frequencies at which a failure can occur.
At the core of the analysis is the generation of a test
driver that defines the space of message sequences to
be generated, the exploration of that space through the
use of dynamic symbolic execution, and the timing and
analysis of the generated tests to identify problematic
frequencies. We implemented our approach in the context
of the popular Robotic Operating System and
investigated its application to three systems of
increasing complexity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Gu:2012:RDK,
author = "Zhongxian Gu and Earl T. Barr and Drew Schleck and
Zhendong Su",
title = "Reusing debugging knowledge via trace-based bug
search",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "927--942",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384684",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Some bugs, among the millions that exist, are similar
to each other. One bug-fixing tactic is to search for
similar bugs that have been reported and resolved in
the past. A fix for a similar bug can help a developer
understand a bug, or even directly fix it. Studying
bugs with similar symptoms, programmers may determine
how to detect or resolve them. To speed debugging, we
advocate the systematic capture and reuse of debugging
knowledge, much of which is currently wasted. The core
challenge here is how to search for similar bugs. To
tackle this problem, we exploit semantic bug
information in the form of execution traces, which
precisely capture bug semantics. This paper introduces
novel tool and language support for semantically
querying and analyzing bugs. We describe OSCILLOSCOPE,
an Eclipse plugin, that uses a bug trace to
exhaustively search its database for similar bugs and
return their bug reports. OSCILLOSCOPE displays the
traces of the bugs it returns against the trace of the
target bug, so a developer can visually examine the
quality of the matches. OSCILLOSCOPE rests on our bug
query language (BQL), a flexible query language over
traces. To realize OSCILLOSCOPE, we developed an open
infrastructure that consists of a trace collection
engine, BQL, a Hadoop-based query engine for BQL, a
trace-indexed bug database, as well as a web-based
frontend. OSCILLOSCOPE records and uploads bug traces
to its infrastructure; it does so automatically when a
JUnit test fails. We evaluated OSCILLOSCOPE on bugs
collected from popular open-source projects. We show
that OSCILLOSCOPE accurately and efficiently finds
similar bugs, some of which could have been immediately
used to fix open bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Strickland:2012:CIR,
author = "T. Stephen Strickland and Sam Tobin-Hochstadt and
Robert Bruce Findler and Matthew Flatt",
title = "Chaperones and impersonators: run-time support for
reasonable interposition",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "943--962",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384685",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Chaperones and impersonators provide run-time support
for interposing on primitive operations such as
function calls, array access and update, and structure
field access and update. Unlike most interposition
support, chaperones and impersonators are restricted so
that they constrain the behavior of the interposing
code to reasonable interposition, which in practice
preserves the abstraction mechanisms and reasoning that
programmers and compiler analyses rely on. Chaperones
and impersonators are particularly useful for
implementing contracts, and our implementation in
Racket allows us to improve both the expressiveness and
the performance of Racket's contract system.
Specifically, contracts on mutable data can be enforced
without changing the API to that data; contracts on
large data structures can be checked lazily on only the
accessed parts of the structure; contracts on objects
and classes can be implemented with lower overhead; and
contract wrappers can preserve object equality where
appropriate. With this extension, gradual typing
systems, such as Typed Racket, that rely on contracts
for interoperation with untyped code can now pass
mutable values safely between typed and untyped
modules.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Solodkyy:2012:OET,
author = "Yuriy Solodkyy and Gabriel {Dos Reis} and Bjarne
Stroustrup",
title = "Open and efficient type switch for {C++}",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "963--982",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384686",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Selecting operations based on the run-time type of an
object is key to many object-oriented and functional
programming techniques. We present a technique for
implementing open and efficient type switching on
hierarchical extensible data types. The technique is
general and copes well with C++ multiple inheritance.
To simplify experimentation and gain realistic
performance using production-quality compilers and tool
chains, we implement a type switch construct as an ISO
C++11 library, called Mach7. This library-only
implementation provides concise notation and
outperforms the visitor design pattern, commonly used
for case analysis on types in object-oriented
programming. For closed sets of types, its performance
roughly equals equivalent code in functional languages,
such as OCaml and Haskell. The type-switching code is
easier to use and is more expressive than hand-coded
visitors are. The library is non-intrusive and
circumvents most of the extensibility restrictions
typical of the visitor design pattern. It was motivated
by applications involving large, typed, abstract syntax
trees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Tamayo:2012:UBD,
author = "Juan M. Tamayo and Alex Aiken and Nathan Bronson and
Mooly Sagiv",
title = "Understanding the behavior of database operations
under program control",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "983--996",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384688",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Applications that combine general program logic with
persistent databases (e.g., three-tier applications)
often suffer large performance penalties from poor use
of the database. We introduce a program analysis
technique that combines information flow in the program
with commutativity analysis of its database operations
to produce a unified dependency graph for database
statements, which provides programmers with a
high-level view of how costly database operations are
and how they are connected in the program. As an
example application of our analysis we describe three
optimizations that can be discovered by examining the
structure of the dependency graph; each helps remove
communication latency from the critical path of a
multi-tier system. We implement our technique in a tool
for Java applications using JDBC and experimentally
validate it using the multi-tier component of the
Dacapo benchmark.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Mishne:2012:TBS,
author = "Alon Mishne and Sharon Shoham and Eran Yahav",
title = "Typestate-based semantic code search over partial
programs",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "997--1016",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384689",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel code search approach for answering
queries focused on API-usage with code showing how the
API should be used. To construct a search index, we
develop new techniques for statically mining and
consolidating temporal API specifications from code
snippets. In contrast to existing semantic-based
techniques, our approach handles partial programs in
the form of code snippets. Handling snippets allows us
to consume code from various sources such as parts of
open source projects, educational resources (e.g.
tutorials), and expert code sites. To handle code
snippets, our approach (i) extracts a possibly partial
temporal specification from each snippet using a
relatively precise static analysis tracking a
generalized notion of typestate, and (ii) consolidates
the partial temporal specifications, combining
consistent partial information to yield consolidated
temporal specifications, each of which captures a
full(er) usage scenario. To answer a search query, we
define a notion of relaxed inclusion matching a query
against temporal specifications and their corresponding
code snippets. We have implemented our approach in a
tool called PRIME and applied it to search for API
usage of several challenging APIs. PRIME was able to
analyze and consolidate thousands of snippets per
tested API, and our results indicate that the
combination of a relatively precise analysis and
consolidation allowed PRIME to answer challenging
queries effectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Xu:2012:FRD,
author = "Guoqing Xu",
title = "Finding reusable data structures",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "1017--1034",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384690",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A big source of run-time performance problems in
large-scale, object-oriented applications is the
frequent creation of data structures (by the same
allocation site) whose lifetimes are disjoint, and
whose shapes and data content are always the same.
Constructing these data structures and computing the
same data values many times is expensive; significant
performance improvements can be achieved by reusing
their instances, shapes, and/or data values rather than
reconstructing them. This paper presents a run-time
technique that can be used to help programmers find
allocation sites that create such data structures to
improve performance. At the heart of the technique are
three reusability definitions and novel summarization
approaches that compute summaries for data structures
based on these definitions. The computed summaries are
used subsequently to find data structures that have
disjoint lifetimes, and/or that have the same shapes
and content. We have implemented this technique in the
Jikes RVM and performed extensive studies on
large-scale, real-world programs. We describe our
experience using six case studies, in which we have
achieved large performance gains by fixing problems
reported by our tool.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{OCallahan:2012:WYW,
author = "Robert O'Callahan",
title = "Why is your {Web} browser using so much memory?",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "1--2",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2258998",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Browsers are the operating systems of the Web. They
support a vast universe of applications written in a
modern garbage-collected programming language. Browsers
expose a rich platform API mostly implemented in C++.
Browsers are also consumer software with low switching
costs in an intensely competitive market. Thus in
addition to standard requirements such as maximizing
throughput and minimizing latency, browsers have to
consider issues like-when the user closes a window
while watching Task Manager, they want to see memory
usage go down. Browsers have to compete to minimize
memory usage even for poorly written applications. In
this talk I will elucidate these requirements and
describe how Firefox and other browsers address them. I
will pay particular attention to issues that we don't
know how to solve, and that could benefit from research
attention.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhou:2012:MMM,
author = "Jin Zhou and Brian Demsky",
title = "Memory management for many-core processors with
software configurable locality policies",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "3--14",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259000",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "As processors evolve towards higher core counts,
architects will develop more sophisticated memory
systems to satisfy the cores' increasing thirst for
memory bandwidth. Early many-core processor designs
suggest that future memory systems will likely include
multiple controllers and distributed cache coherence
protocols. Many-core processors that expose memory
locality policies to the software system provide
opportunities for automatic tuning that can achieve
significant performance benefits. Managed languages
typically provide a simple heap abstraction. This paper
presents techniques that bridge the gap between the
simple heap abstraction of modern languages and the
complicated memory systems of future processors. We
present a NUMA-aware approach to garbage collection
that balances the competing concerns of data locality
and heap utilization to improve performance. We combine
a lightweight approach for measuring an application's
memory behavior with an online, adaptive algorithm for
tuning the cache to optimize it for the specific
application's behaviors. We have implemented our
garbage collector and cache tuning algorithm and
present results on a 64-core TILEPro64 processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lyberis:2012:MMA,
author = "Spyros Lyberis and Polyvios Pratikakis and Dimitrios
S. Nikolopoulos and Martin Schulz and Todd Gamblin and
Bronis R. de Supinski",
title = "The {Myrmics} memory allocator: hierarchical,
message-passing allocation for global address spaces",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "15--24",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259001",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Constantly increasing hardware parallelism poses more
and more challenges to programmers and language
designers. One approach to harness the massive
parallelism is to move to task-based programming models
that rely on runtime systems for dependency analysis
and scheduling. Such models generally benefit from the
existence of a global address space. This paper
presents the parallel memory allocator of the Myrmics
runtime system, in which multiple allocator instances
organized in a tree hierarchy cooperate to implement a
global address space with dynamic region support on
distributed memory machines. The Myrmics hierarchical
memory allocator is step towards improved productivity
and performance in parallel programming. Productivity
is improved through the use of dynamic regions in a
global address space, which provide a convenient shared
memory abstraction for dynamic and irregular data
structures. Performance is improved through scaling on
manycore systems without system-wide cache coherency.
We evaluate the stand-alone allocator on an MPI-based
x86 cluster and find that it scales well for up to 512
worker cores, while it can outperform Unified Parallel
C by a factor of 3.7-10.7x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Maas:2012:GOO,
author = "Martin Maas and Philip Reames and Jeffrey Morlan and
Krste Asanovi{\'c} and Anthony D. Joseph and John
Kubiatowicz",
title = "{GPUs} as an opportunity for offloading garbage
collection",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "25--36",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259002",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "GPUs have become part of most commodity systems.
Nonetheless, they are often underutilized when not
executing graphics-intensive or special-purpose
numerical computations, which are rare in consumer
workloads. Emerging architectures, such as integrated
CPU/GPU combinations, may create an opportunity to
utilize these otherwise unused cycles for offloading
traditional systems tasks. Garbage collection appears
to be a particularly promising candidate for
offloading, due to the popularity of managed languages
on consumer devices. We investigate the challenges for
offloading garbage collection to a GPU, by examining
the performance trade-offs for the mark phase of a mark
\& sweep garbage collector. We present a theoretical
analysis and an algorithm that demonstrates the
feasibility of this approach. We also discuss a number
of algorithmic design trade-offs required to leverage
the strengths and capabilities of the GPU hardware. Our
algorithm has been integrated into the Jikes RVM and we
present promising performance results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yang:2012:BRF,
author = "Xi Yang and Stephen M. Blackburn and Daniel Frampton
and Antony L. Hosking",
title = "Barriers reconsidered, friendlier still!",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "37--48",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259004",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Read and write barriers mediate access to the heap
allowing the collector to control and monitor mutator
actions. For this reason, barriers are a powerful tool
in the design of any heap management algorithm, but the
prevailing wisdom is that they impose significant
costs. However, changes in hardware and workloads make
these costs a moving target. Here, we measure the cost
of a range of useful barriers on a range of modern
hardware and workloads. We confirm some old results and
overturn others. We evaluate the microarchitectural
sensitivity of barrier performance and the differences
among benchmark suites. We also consider barriers in
context, focusing on their behavior when used in
combination, and investigate a known pathology and
evaluate solutions. Our results show that read and
write barriers have average overheads as low as 5.4\%
and 0.9\% respectively. We find that barrier overheads
are more exposed on the workload provided by the modern
DaCapo benchmarks than on old SPECjvm98 benchmarks.
Moreover, there are differences in barrier behavior
between in-order and out-of- order machines, and their
respective memory subsystems, which indicate different
barrier choices for different platforms. These changing
costs mean that algorithm designers need to reconsider
their design choices and the nature of their resulting
algorithms in order to exploit the opportunities
presented by modern hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sivaramakrishnan:2012:ERB,
author = "KC Sivaramakrishnan and Lukasz Ziarek and Suresh
Jagannathan",
title = "Eliminating read barriers through procrastination and
cleanliness",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "49--60",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259005",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Managed languages typically use read barriers to
interpret forwarding pointers introduced to keep track
of copied objects. For example, in a multicore
environment with thread-local heaps and a global,
shared heap, an object initially allocated on a local
heap may be copied to a shared heap if it becomes the
source of a store operation whose target location
resides on the shared heap. As part of the copy
operation, a forwarding pointer may be established in
the original object to point to the copied object. This
level of indirection avoids the need to update all of
the references to the object that has been copied. In
this paper, we consider the design of a managed runtime
that eliminates read barriers. Our design is premised
on the availability of a sufficient degree of
concurrency to stall operations that would otherwise
necessitate the copy. Stalled actions are deferred
until the next local collection, avoiding exposing
forwarding pointers to the mutator. In certain
important cases, procrastination is unnecessary ---
lightweight runtime techniques can sometimes be used to
allow objects to be eagerly copied when their set of
incoming references is known, or when it can be
determined that having multiple copies would not
violate program semantics. We evaluate our techniques
on 3 platforms: a 16-core AMD64 machine, a 48-core
Intel SCC, and an 864-core Azul Vega 3. Experimental
results over a range of parallel benchmarks indicate
that our approach leads to notable performance gains
(20 --- 32\% on average) without incurring any
additional complexity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Iyengar:2012:SCP,
author = "Balaji Iyengar and Edward Gehringer and Michael Wolf
and Karthikeyan Manivannan",
title = "Scalable concurrent and parallel mark",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "61--72",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259006",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Parallel marking algorithms use multiple threads to
walk through the object heap graph and mark each
reachable object as live. Parallel marker threads mark
an object ``live'' by atomically setting a bit in a
mark-bitmap or a bit in the object header. Most of
these parallel algorithms strive to improve the marking
throughput by using work-stealing algorithms for
load-balancing and to ensure that all participating
threads are kept busy. A purely ``processor-centric''
load-balancing approach in conjunction with a need to
atomically set the mark bit, results in significant
contention during parallel marking. This limits the
scalability and throughput of parallel marking
algorithms. We describe a new non-blocking and
lock-free, work-sharing algorithm, the primary goal
being to reduce contention during atomic updates of the
mark-bitmap by parallel task-threads. Our work-sharing
mechanism uses the address of a word in the mark-bitmap
as the key to stripe work among parallel task-threads,
with only a subset of the task-threads working on each
stripe. This filters out most of the contention during
parallel marking with 20\% improvements in performance.
In case of concurrent and on-the-fly collector
algorithms, mutator threads also generate marking-work
for the marking task-threads. In these schemes, mutator
threads are also provided with thread-local marking
stacks where they collect references to potentially
``gray'' objects, i.e., objects that haven't been
``marked-through'' by the collector. We note that since
this work is generated by mutators when they reference
these objects, there is a high likelihood that these
objects continue to be present in the processor cache.
We describe and evaluate a scheme to distribute mutator
generated marking work among the collector's
task-threads that is cognizant of the processor and
cache topology. We prototype both our algorithms within
the C4 [28] collector that ships as part of an
industrial strength JVM for the Linux-X86 platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shahriyar:2012:CGR,
author = "Rifat Shahriyar and Stephen M. Blackburn and Daniel
Frampton",
title = "Down for the count? {Getting} reference counting back
in the ring",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "73--84",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259008",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Reference counting and tracing are the two fundamental
approaches that have underpinned garbage collection
since 1960. However, despite some compelling
advantages, reference counting is almost completely
ignored in implementations of high performance systems
today. In this paper we take a detailed look at
reference counting to understand its behavior and to
improve its performance. We identify key design choices
for reference counting and analyze how the behavior of
a wide range of benchmarks might affect design
decisions. As far as we are aware, this is the first
such quantitative study of reference counting. We use
insights gleaned from this analysis to introduce a
number of optimizations that significantly improve the
performance of reference counting. We find that an
existing modern implementation of reference counting
has an average 30\% overhead compared to tracing, and
that in combination, our optimizations are able to
completely eliminate that overhead. This brings the
performance of reference counting on par with that of a
well tuned mark-sweep collector. We keep our in-depth
analysis of reference counting as general as possible
so that it may be useful to other garbage collector
implementers. Our finding that reference counting can
be made directly competitive with well tuned mark-sweep
should shake the community's prejudices about reference
counting and perhaps open new opportunities for
exploiting reference counting's strengths, such as
localization and immediacy of reclamation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Iyengar:2012:CWF,
author = "Balaji Iyengar and Gil Tene and Michael Wolf and
Edward Gehringer",
title = "The {Collie}: a wait-free compacting collector",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "85--96",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259009",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "We describe the Collie collector, a fully concurrent
compacting collector that uses transactional memory
techniques to achieve wait-free compaction. The
collector uses compaction as the primary means of
reclaiming unused memory, and performs ``individual
object transplantations'' as transactions. We introduce
new terms and requirements useful for analyzing
concurrent relocating collectors, including definitions
of referrer sets, object transplantation and the notion
of individually transplantable objects. The Collie
collector builds on these terms and on a detailed
analysis of an object's legal states during compaction.
Collie uses a combination of read barriers, write
barriers and transactional memory operations. Its
read-barrier supports fast, direct object referencing
while using a bound, constant time, wait- free
triggering path. Collie thereby avoids the constant
indirection cost of Brooks [9] style barriers or
handle-based heaps [25]. Collie is demonstrated using
speculative multi-address atomicity [11], a form of
hardware transactional memory supported by the Azul
Vega architecture [2]. We evaluate the Collie collector
on the Azul platform, on which previous concurrent
collectors such as the Pauseless Collector [12] and its
generational variant [30] have been commercially
available for several years. We discuss Collie's
performance while running sustained workloads, and
compare it to the Pauseless collector on the same
platform. The Collie collector provides significant MMU
[5] improvements even in the 1-msec time windows
compared to the Pauseless collector. At the same time,
it matches Pauseless in throughput and in the ability
to scale to large heap sizes. We believe that the
Collie collector is the first garbage collector to
leverage hardware-assisted transactional memory. While
Collie directly leverages Vega's speculative
multi-address atomicity feature (SMA) [11], its design
can be easily adapted to other hardware- assisted
transactional memory systems. Specifically, the
upcoming Intel TSX instruction set extensions [21]
include capabilities similar to SMA. We expect Collie
to be easily implementable on future commodity servers
based on Intel Haswell processors and following
processor generations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sewe:2012:NSI,
author = "Andreas Sewe and Mira Mezini and Aibek Sarimbekov and
Danilo Ansaloni and Walter Binder and Nathan Ricci and
Samuel Z. Guyer",
title = "{{\tt New Scala() instanceof}} {Java}: a comparison of
the memory behaviour of {Java} and {Scala} programs",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "97--108",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259010",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "While often designed with a single language in mind,
managed runtimes like the Java virtual machine (JVM)
have become the target of not one but many languages,
all of which benefit from the runtime's services. One
of these services is automatic memory management. In
this paper, we compare and contrast the memory
behaviour of programs written in Java and Scala,
respectively, two languages which both target the same
platform: the JVM. We both analyze core object
demographics like object lifetimes as well as secondary
properties of objects like their associated monitors
and identity hash-codes. We find that objects in Scala
programs have lower survival rates and higher rates of
immutability, which is only partly explained by the
memory behaviour of objects representing closures or
boxed primitives. Other metrics vary more by benchmark
than language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gu:2012:GTC,
author = "Xiaoming Gu and Chen Ding",
title = "A generalized theory of collaborative caching",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "109--120",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259012",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Collaborative caching allows software to use hints to
influence cache management in hardware. Previous
theories have shown that such hints observe the
inclusion property and can obtain optimal caching if
the access sequence and the cache size are known ahead
of time. Previously, the interface of a cache hint is
limited, e.g., a binary choice between LRU and MRU. In
this paper, we generalize the hint interface, where a
hint is a number encoding a priority. We show the
generality in a hierarchical relation where
collaborative caching subsumes non-collaborative
caching, and within collaborative caching, the priority
hint subsumes the previous binary hint. We show two
theoretical results for the general hint. The first is
a new cache replacement policy, priority LRU, which
permits the complete range of choices between MRU and
LRU. We prove a new type of inclusion
property---non-uniform inclusion---and give a one-pass
algorithm to compute the miss rate for all cache sizes.
Second, we show that priority hints can enable the use
of the same hints to obtain optimal caching for all
cache sizes, without having to know the cache size
beforehand.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nasre:2012:ESC,
author = "Rupesh Nasre",
title = "Exploiting the structure of the constraint graph for
efficient points-to analysis",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "121--132",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259013",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Points-to analysis is a key compiler analysis. Several
memory related optimizations use points-to information
to improve their effectiveness. Points-to analysis is
performed by building a constraint graph of pointer
variables and dynamically updating it to propagate more
and more points-to information across its subset edges.
So far, the structure of the constraint graph has been
only trivially exploited for efficient propagation of
information, e.g., in identifying cyclic components or
to propagate information in topological order. We
perform a careful study of its structure and propose a
new inclusion-based flow-insensitive context-sensitive
points-to analysis algorithm based on the notion of
dominant pointers. We also propose a new kind of
pointer-equivalence based on dominant pointers which
provides significantly more opportunities for reducing
the number of pointers tracked during the analysis.
Based on this hitherto unexplored form of
pointer-equivalence, we develop a new context-sensitive
flow insensitive points-to analysis algorithm which
uses incremental dominator update to efficiently
compute points-to information. Using a large suite of
programs consisting of SPEC 2000 benchmarks and five
large open source programs we show that our points-to
analysis is 88\% faster than BDD-based Lazy Cycle
Detection and $ 2 \times $ faster than Deep
Propagation. We argue that our approach of detecting
dominator-based pointer-equivalence is a key to improve
points-to analysis efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Inoue:2012:ISC,
author = "Hiroshi Inoue and Toshio Nakatani",
title = "Identifying the sources of cache misses in {Java}
programs without relying on hardware counters",
journal = j-SIGPLAN,
volume = "47",
number = "11",
pages = "133--142",
month = nov,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2426642.2259014",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jan 10 08:55:30 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '12 conference proceedings.",
abstract = "Cache miss stalls are one of the major sources of
performance bottlenecks for multicore processors. A
Hardware Performance Monitor (HPM) in the processor is
useful for locating the cache misses, but is rarely
used in the real world for various reasons. It would be
better to find a simple approach to locate the sources
of cache misses and apply runtime optimizations without
relying on an HPM. This paper shows that pointer
dereferencing in hot loops is a major source of cache
misses in Java programs. Based on this observation, we
devised a new approach to identify the instructions and
objects that cause frequent cache misses. Our heuristic
technique effectively identifies the majority of the
cache misses in typical Java programs by matching the
hot loops to simple idiomatic code patterns. On
average, our technique selected only 2.8\% of the load
and store instructions generated by the JIT compiler
and these instructions accounted for 47\% of the L1D
cache misses and 49\% of the L2 cache misses caused by
the JIT-compiled code. To prove the effectiveness of
our technique in compiler optimizations, we prototyped
object placement optimizations, which align objects in
cache lines or collocate paired objects in the same
cache line to reduce cache misses. For comparison, we
also implemented the same optimizations based on the
accurate information obtained from the HPM. Our results
showed that our heuristic approach was as effective as
the HPM-based approach and achieved comparable
performance improvements in the {\tt SPECjbb2005} and
{\tt SPECpower\_ssj2008} benchmark programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Farmer:2012:HMP,
author = "Andrew Farmer and Andy Gill and Ed Komp and Neil
Sculthorpe",
title = "The {HERMIT} in the machine: a plugin for the
interactive transformation of {GHC} core language
programs",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "1--12",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364508",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "The importance of reasoning about and refactoring
programs is a central tenet of functional programming.
Yet our compilers and development toolchains only
provide rudimentary support for these tasks. This paper
introduces a programmatic and compiler-centric
interface that facilitates refactoring and equational
reasoning. To develop our ideas, we have implemented
HERMIT, a toolkit enabling informal but systematic
transformation of Haskell programs from inside the
Glasgow Haskell Compiler's optimization pipeline. With
HERMIT, users can experiment with optimizations and
equational reasoning, while the tedious heavy lifting
of performing the actual transformations is done for
them. HERMIT provides a transformation API that can be
used to build higher-level rewrite tools. One use-case
is prototyping new optimizations as clients of this API
before being committed to the GHC toolchain. We
describe a HERMIT application --- a read-eval-print
shell for performing transformations using HERMIT. We
also demonstrate using this shell to prototype an
optimization on a specific example, and report our
initial experiences and remaining challenges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Adams:2012:TYB,
author = "Michael D. Adams and Thomas M. DuBuisson",
title = "Template your boilerplate: using {Template Haskell}
for efficient generic programming",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "13--24",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Generic programming allows the concise expression of
algorithms that would otherwise require large amounts
of handwritten code. A number of such systems have been
developed over the years, but a common drawback of
these systems is poor runtime performance relative to
handwritten, non-generic code. Generic-programming
systems vary significantly in this regard, but few
consistently match the performance of handwritten code.
This poses a dilemma for developers.
Generic-programming systems offer concision but cost
performance. Handwritten code offers performance but
costs concision. This paper explores the use of
Template Haskell to achieve the best of both worlds. It
presents a generic-programming system for Haskell that
provides both the concision of other
generic-programming systems and the efficiency of
handwritten code. Our system gives the programmer a
high-level, generic-programming interface, but uses
Template Haskell to generate efficient, non-generic
code that outperforms existing generic-programming
systems for Haskell. This paper presents the results of
benchmarking our system against both handwritten code
and several other generic-programming systems. In these
benchmarks, our system matches the performance of
handwritten code while other systems average anywhere
from two to twenty times slower.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lippmeier:2012:GPA,
author = "Ben Lippmeier and Manuel Chakravarty and Gabriele
Keller and Simon Peyton Jones",
title = "Guiding parallel array fusion with indexed types",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "25--36",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "We present a refined approach to parallel array fusion
that uses indexed types to specify the internal
representation of each array. Our approach aids the
client programmer in reasoning about the performance of
their program in terms of the source code. It also
makes the intermediate code easier to transform at
compile-time, resulting in faster compilation and more
reliable runtimes. We demonstrate how our new approach
improves both the clarity and performance of several
end-user written programs, including a fluid flow
solver and an interpolator for volumetric data.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Keller:2012:VA,
author = "Gabriele Keller and Manuel M. T. Chakravarty and Roman
Leshchinskiy and Ben Lippmeier and Simon Peyton Jones",
title = "Vectorisation avoidance",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "37--48",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Flattening nested parallelism is a vectorising code
transform that converts irregular nested parallelism
into flat data parallelism. Although the result has
good asymptotic performance, flattening thoroughly
restructures the code. Many intermediate data
structures and traversals are introduced, which may or
may not be eliminated by subsequent optimisation. We
present a novel program analysis to identify parts of
the program where flattening would only introduce
overhead, without appropriate gain. We present
empirical evidence that avoiding vectorisation in these
cases leads to more efficient programs than if we had
applied vectorisation and then relied on array fusion
to eliminate intermediates from the resulting code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jeuring:2012:TTC,
author = "Johan Jeuring and Patrik Jansson and Cl{\'a}udio
Amaral",
title = "Testing type class laws",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "49--60",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364514",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "The specification of a class in Haskell often starts
with stating, in comments, the laws that should be
satisfied by methods defined in instances of the class,
followed by the type of the methods of the class. This
paper develops a framework that supports testing such
class laws using QuickCheck. Our framework is a
light-weight class law testing framework, which
requires a limited amount of work per class law, and
per datatype for which the class law is tested. We also
show how to test class laws with partially-defined
values. Using partially-defined values, we show that
the standard lazy and strict implementations of the
state monad do not satisfy the expected laws.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Duregaard:2012:FFE,
author = "Jonas Dureg{\aa}rd and Patrik Jansson and Meng Wang",
title = "{Feat}: functional enumeration of algebraic types",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "61--72",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364515",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "In mathematics, an enumeration of a set S is a
bijective function from (an initial segment of) the
natural numbers to S. We define ``functional
enumerations'' as efficiently computable such
bijections. This paper describes a theory of functional
enumeration and provides an algebra of enumerations
closed under sums, products, guarded recursion and
bijections. We partition each enumerated set into
numbered, finite subsets. We provide a generic
enumeration such that the number of each part
corresponds to the size of its values (measured in the
number of constructors). We implement our ideas in a
Haskell library called testing-feat, and make the
source code freely available. Feat provides efficient
``random access'' to enumerated values. The primary
application is property-based testing, where it is used
to define both random sampling (for example QuickCheck
generators) and exhaustive enumeration (in the style of
SmallCheck). We claim that functional enumeration is
the best option for automatically generating test cases
from large groups of mutually recursive syntax tree
types. As a case study we use Feat to test the
pretty-printer of the Template Haskell library
(uncovering several bugs).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Claessen:2012:SSF,
author = "Koen Claessen",
title = "Shrinking and showing functions: (functional pearl)",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "73--80",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364516",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Although quantification over functions in QuickCheck
properties has been supported from the beginning,
displaying and shrinking them as counter examples has
not. The reason is that in general, functions are
infinite objects, which means that there is no sensible
show function for them, and shrinking an infinite
object within a finite number of steps seems
impossible. This paper presents a general technique
with which functions as counter examples can be shrunk
to finite objects, which can then be displayed to the
user. The approach turns out to be practically usable,
which is shown by a number of examples. The two main
limitations are that higher-order functions cannot be
dealt with, and it is hard to deal with terms that
contain functions as subterms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Allen:2012:SDR,
author = "Wyatt Allen and Martin Erwig",
title = "{Surveyor}: a {DSEL} for representing and analyzing
strongly typed surveys",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "81--90",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Polls and surveys are increasingly employed to gather
information about attitudes and experiences of all
kinds of populations and user groups. The ultimate
purpose of a survey is to identify trends and
relationships that can inform decision makers. To this
end, the data gathered by a survey must be
appropriately analyzed. Most of the currently existing
tools focus on the user interface aspect of the data
collection task, but pay little attention to the
structure and type of the collected data, which are
usually represented as potentially tag-annotated, but
otherwise unstructured, plain text. This makes the task
of writing data analysis programs often difficult and
error-prone, whereas a typed data representation could
support the writing of type-directed data analysis
tools that would enjoy the many benefits of static
typing. In this paper we present Surveyor, a DSEL that
allows the compositional construction of typed surveys,
where the types describe the structure of the data to
be collected. A survey can be run to gather typed data,
which can then be subjected to analysis tools that are
built using Surveyor's typed combinators. Altogether
the Surveyor DSEL realizes a strongly typed and
type-directed approach to data gathering and analysis.
The implementation of our DSEL is based on GADTs to
allow a flexible, yet strongly typed representation of
surveys. Moreover, the implementation employs the
Scrap-Your-Boilerplate library to facilitate the
type-dependent traversal, extraction, and combination
of data gathered from surveys.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Winograd-Cort:2012:WIE,
author = "Daniel Winograd-Cort and Paul Hudak",
title = "Wormholes: introducing effects to {FRP}",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "91--104",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364519",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Functional reactive programming (FRP) is a useful
model for programming real-time and reactive systems in
which one defines a signal function to process a stream
of input values into a stream of output values.
However, performing side effects (e.g. memory mutation
or input/output) in this model is tricky and typically
unsafe. In previous work, Winograd-Cort et al. [2012]
introduced resource types and wormholes to address this
problem. This paper better motivates, expands upon, and
formalizes the notion of a wormhole to fully unlock its
potential. We show, for example, that wormholes can be
used to define the concept of causality. This in turn
allows us to provide behaviors such as looping, a core
component of most languages, without building it
directly into the language. We also improve upon our
previous design by making wormholes less verbose and
easier to use. To formalize the notion of a wormhole,
we define an extension to the simply typed lambda
calculus, complete with typing rules and operational
semantics. In addition, we present a new form of
semantic transition that we call a temporal transition
to specify how an FRP program behaves over time and to
allow us to better reason about causality. As our model
is designed for a Haskell implementation, the semantics
are lazy. Finally, with the language defined, we prove
that our wormholes indeed allow side effects to be
performed safely in an FRP framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yorgey:2012:MTV,
author = "Brent A. Yorgey",
title = "{Monoids}: theme and variations (functional pearl)",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "105--116",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364520",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "The monoid is a humble algebraic structure, at first
glance even downright boring. However, there's much
more to monoids than meets the eye. Using examples
taken from the diagrams vector graphics framework as a
case study, I demonstrate the power and beauty of
monoids for library design. The paper begins with an
extremely simple model of diagrams and proceeds through
a series of incremental variations, all related somehow
to the central theme of monoids. Along the way, I
illustrate the power of compositional semantics; why
you should also pay attention to the monoid's even
humbler cousin, the semigroup; monoid homomorphisms;
and monoid actions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Eisenberg:2012:DTP,
author = "Richard A. Eisenberg and Stephanie Weirich",
title = "Dependently typed programming with singletons",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "117--130",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Haskell programmers have been experimenting with
dependent types for at least a decade, using clever
encodings that push the limits of the Haskell type
system. However, the cleverness of these encodings is
also their main drawback. Although the ideas are
inspired by dependently typed programs, the code looks
significantly different. As a result, GHC implementors
have responded with extensions to Haskell's type
system, such as GADTs, type families, and datatype
promotion. However, there remains a significant
difference between programming in Haskell and in
full-spectrum dependently typed languages. Haskell
enforces a phase separation between runtime values and
compile-time types. Therefore, singleton types are
necessary to express the dependency between values and
types. These singleton types introduce overhead and
redundancy for the programmer. This paper presents the
singletons library, which generates the boilerplate
code necessary for dependently typed programming using
GHC. To compare with full-spectrum languages, we
present an extended example based on an Agda interface
for safe database access. The paper concludes with a
detailed discussion on the current capabilities of GHC
for dependently typed programming and suggestions for
future extensions to better support this style of
programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Swierstra:2012:XCE,
author = "Wouter Swierstra",
title = "{{\tt xmonad}} in {Coq} (experience report):
programming a window manager in a proof assistant",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "131--136",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364523",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "This report documents the insights gained from
implementing the core functionality of xmonad, a
popular window manager written in Haskell, in the Coq
proof assistant. Rather than focus on verification,
this report outlines the technical challenges involved
with incorporating Coq code in a Haskell project.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Terei:2012:SH,
author = "David Terei and Simon Marlow and Simon Peyton Jones
and David Mazi{\`e}res",
title = "{Safe Haskell}",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "137--148",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364524",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Though Haskell is predominantly type-safe,
implementations contain a few loopholes through which
code can bypass typing and module encapsulation. This
paper presents Safe Haskell, a language extension that
closes these loopholes. Safe Haskell makes it possible
to confine and safely execute untrusted, possibly
malicious code. By strictly enforcing types, Safe
Haskell allows a variety of different policies from API
sandboxing to information-flow control to be
implemented easily as monads. Safe Haskell is aimed to
be as unobtrusive as possible. It enforces properties
that programmers tend to meet already by convention. We
describe the design of Safe Haskell and an
implementation (currently shipping with GHC) that
infers safety for code that lies in a safe subset of
the language. We use Safe Haskell to implement an
online Haskell interpreter that can securely execute
arbitrary untrusted code with no overhead. The use of
Safe Haskell greatly simplifies this task and allows
the use of a large body of existing code and tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Erdweg:2012:LSL,
author = "Sebastian Erdweg and Felix Rieger and Tillmann Rendel
and Klaus Ostermann",
title = "Layout-sensitive language extensibility with
{SugarHaskell}",
journal = j-SIGPLAN,
volume = "47",
number = "12",
pages = "149--160",
month = dec,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2430532.2364526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jan 18 18:22:13 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '12 conference proceedings.",
abstract = "Programmers need convenient syntax to write elegant
and concise programs. Consequently, the Haskell
standard provides syntactic sugar for some scenarios
(e.g., do notation for monadic code), authors of
Haskell compilers provide syntactic sugar for more
scenarios (e.g., arrow notation in GHC), and some
Haskell programmers implement preprocessors for their
individual needs (e.g., idiom brackets in SHE). But
manually written preprocessors cannot scale: They are
expensive, error-prone, and not composable. Most
researchers and programmers therefore refrain from
using the syntactic notations they need in actual
Haskell programs, but only use them in documentation or
papers. We present a syntactically extensible version
of Haskell, SugarHaskell, that empowers ordinary
programmers to implement and use custom syntactic
sugar. Building on our previous work on syntactic
extensibility for Java, SugarHaskell integrates
syntactic extensions as sugar libraries into Haskell's
module system. Syntax extensions in SugarHaskell can
declare arbitrary context-free and layout-sensitive
syntax. SugarHaskell modules are compiled into Haskell
modules and further processed by a Haskell compiler. We
provide an Eclipse-based IDE for SugarHaskell that is
extensible, too, and automatically provides syntax
coloring for all syntax extensions imported into a
module. We have validated SugarHaskell with several
case studies, including arrow notation (as implemented
in GHC) and EBNF as a concise syntax for the
declaration of algebraic data types with associated
concrete syntax. EBNF declarations also show how to
extend the extension mechanism itself: They introduce
syntactic sugar for using the declared concrete syntax
in other SugarHaskell modules.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cousot:2013:EMO,
author = "Radhia Cousot",
title = "Engineering mathematics: the odd order theorem proof",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "1--2",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429071",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Even with the assistance of computer tools, the
formalized description and verification of
research-level mathematics remains a daunting task, not
least because of the talent with which mathematicians
combine diverse theories to achieve their ends. By
combining tools and techniques from type theory,
language design, and software engineering we have
managed to capture enough of these practices to
formalize the proof of the Odd Order theorem, a
landmark result in Group Theory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Losch:2013:FAN,
author = "Steffen L{\"o}sch and Andrew M. Pitts",
title = "Full abstraction for nominal {Scott} domains",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "3--14",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429073",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We develop a domain theory within nominal sets and
present programming language constructs and results
that can be gained from this approach. The development
is based on the concept of orbit-finite subset, that
is, a subset of a nominal sets that is both finitely
supported and contained in finitely many orbits. This
concept appears prominently in the recent research
programme of Bojanczyk et al. on automata over infinite
languages, and our results establish a connection
between their work and a characterisation of
topological compactness discovered, in a quite
different setting, by Winskel and Turner as part of a
nominal domain theory for concurrency. We use this
connection to derive a notion of Scott domain within
nominal sets. The functionals for existential
quantification over names and `definite description'
over names turn out to be compact in the sense
appropriate for nominal Scott domains. Adding them,
together with parallel-or, to a programming language
for recursively defined higher-order functions with
name abstraction and locally scoped names, we prove a
full abstraction result for nominal Scott domains
analogous to Plotkin's classic result about PCF and
conventional Scott domains: two program phrases have
the same observable operational behaviour in all
contexts if and only if they denote equal elements of
the nominal Scott domain model. This is the first full
abstraction result we know of for higher-order
functions with local names that uses a domain theory
based on ordinary extensional functions, rather than
using the more intensional approach of game
semantics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Tate:2013:SSP,
author = "Ross Tate",
title = "The sequential semantics of producer effect systems",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "15--26",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429074",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Effects are fundamental to programming languages. Even
the lambda calculus has effects, and consequently the
two famous evaluation strategies produce different
semantics. As such, much research has been done to
improve our understanding of effects. Since Moggi
introduced monads for his computational lambda
calculus, further generalizations have been designed to
formalize increasingly complex computational effects,
such as indexed monads followed by layered monads
followed by parameterized monads. This succession
prompted us to determine the most general formalization
possible. In searching for this formalization we came
across many surprises, such as the insufficiencies of
arrows, as well as many unexpected insights, such as
the importance of considering an effect as a small
component of a whole system rather than just an
isolated feature. In this paper we present our semantic
formalization for producer effect systems, which we
call a productor, and prove its maximal generality by
focusing on only sequential composition of effectful
computations, consequently guaranteeing that the
existing monadic techniques are specializations of
productors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Abel:2013:CPI,
author = "Andreas Abel and Brigitte Pientka and David Thibodeau
and Anton Setzer",
title = "{Copatterns}: programming infinite structures by
observations",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "27--38",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429075",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Inductive datatypes provide mechanisms to define
finite data such as finite lists and trees via
constructors and allow programmers to analyze and
manipulate finite data via pattern matching. In this
paper, we develop a dual approach for working with
infinite data structures such as streams. Infinite data
inhabits coinductive datatypes which denote greatest
fixpoints. Unlike finite data which is defined by
constructors we define infinite data by observations.
Dual to pattern matching, a tool for analyzing finite
data, we develop the concept of copattern matching,
which allows us to synthesize infinite data. This leads
to a symmetric language design where pattern matching
on finite and infinite data can be mixed. We present a
core language for programming with infinite structures
by observations together with its operational semantics
based on (co)pattern matching and describe coverage of
copatterns. Our language naturally supports both
call-by-name and call-by-value interpretations and can
be seamlessly integrated into existing languages like
Haskell and ML. We prove type soundness for our
language and sketch how copatterns open new directions
for solving problems in the interaction of coinductive
and dependent types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Blelloch:2013:CEF,
author = "Guy E. Blelloch and Robert Harber",
title = "Cache and {I/O} efficient functional algorithms",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "39--50",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429077",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The widely studied I/O and ideal-cache models were
developed to account for the large difference in costs
to access memory at different levels of the memory
hierarchy. Both models are based on a two level memory
hierarchy with a fixed size primary memory(cache) of
size {$M$}, an unbounded secondary memory organized in
blocks of size {$B$}. The cost measure is based purely
on the number of block transfers between the primary
and secondary memory. All other operations are free.
Many algorithms have been analyzed in these models and
indeed these models predict the relative performance of
algorithms much more accurately than the standard RAM
model. The models, however, require specifying
algorithms at a very low level requiring the user to
carefully lay out their data in arrays in memory and
manage their own memory allocation. In this paper we
present a cost model for analyzing the memory
efficiency of algorithms expressed in a simple
functional language. We show how some algorithms
written in standard forms using just lists and trees
(no arrays) and requiring no explicit memory layout or
memory management are efficient in the model. We then
describe an implementation of the language and show
provable bounds for mapping the cost in our model to
the cost in the ideal-cache model. These bound imply
that purely functional programs based on lists and
trees with no special attention to any details of
memory layout can be as asymptotically as efficient as
the carefully designed imperative I/O efficient
algorithms. For example we describe an {$ O(n_B \log M
/ B n_B) $} cost sorting algorithm, which is optimal in
the ideal cache and I/O models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Ben-Amram:2013:LRP,
author = "Amir M. Ben-Amram and Samir Genaim",
title = "On the linear ranking problem for integer
linear-constraint loops",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "51--62",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429078",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we study the complexity of the Linear
Ranking problem: given a loop, described by linear
constraints over a finite set of integer variables, is
there a linear ranking function for this loop? While
existence of such a function implies termination, this
problem is not equivalent to termination. When the
variables range over the rationals or reals, the Linear
Ranking problem is known to be PTIME decidable.
However, when they range over the integers, whether for
single-path or multipath loops, the complexity of the
Linear Ranking problem has not yet been determined. We
show that it is coNP-complete. However, we point out
some special cases of importance of PTIME complexity.
We also present complete algorithms for synthesizing
linear ranking functions, both for the general case and
the special PTIME cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Mayr:2013:AAM,
author = "Richard Mayr and Lorenzo Clemente",
title = "Advanced automata minimization",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "63--74",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429079",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an efficient algorithm to reduce the size
of nondeterministic Buchi word automata, while
retaining their language. Additionally, we describe
methods to solve PSPACE-complete automata problems like
universality, equivalence and inclusion for much larger
instances (1-3 orders of magnitude) than before. This
can be used to scale up applications of automata in
formal verification tools and decision procedures for
logical theories. The algorithm is based on new
transition pruning techniques. These use criteria based
on combinations of backward and forward trace
inclusions. Since these relations are themselves
PSPACE-complete, we describe methods to compute good
approximations of them in polynomial time. Extensive
experiments show that the average-case complexity of
our algorithm scales quadratically. The size reduction
of the automata depends very much on the class of
instances, but our algorithm consistently outperforms
all previous techniques by a wide margin. We tested our
algorithm on Buchi automata derived from LTL-formulae,
many classes of random automata and automata derived
from mutual exclusion protocols, and compared its
performance to the well-known automata tool GOAL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Unno:2013:ARC,
author = "Hiroshi Unno and Tachio Terauchi and Naoki Kobayashi",
title = "Automating relatively complete verification of
higher-order functional programs",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "75--86",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429081",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an automated approach to relatively
completely verifying safety (i.e., reachability)
property of higher-order functional programs. Our
contribution is two-fold. First, we extend the
refinement type system framework employed in the recent
work on (incomplete) automated higher-order
verification by drawing on the classical work on
relatively complete ``Hoare logic like'' program logic
for higher-order procedural languages. Then, by
adopting the recently proposed techniques for solving
constraints over quantified first-order logic formulas,
we develop an automated type inference method for the
type system, thereby realizing an automated relatively
complete verification of higher-order programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Atkey:2013:AIA,
author = "Robert Atkey and Patricia Johann and Andrew Kennedy",
title = "Abstraction and invariance for algebraically indexed
types",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "87--100",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429082",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reynolds' relational parametricity provides a powerful
way to reason about programs in terms of invariance
under changes of data representation. A dazzling array
of applications of Reynolds' theory exists, exploiting
invariance to yield ``free theorems'', non-inhabitation
results, and encodings of algebraic datatypes. Outside
computer science, invariance is a common theme running
through many areas of mathematics and physics. For
example, the area of a triangle is unaltered by
rotation or flipping. If we scale a triangle, then we
scale its area, maintaining an invariant relationship
between the two. The transformations under which
properties are invariant are often organised into
groups, with the algebraic structure reflecting the
composability and invertibility of transformations. In
this paper, we investigate programming languages whose
types are indexed by algebraic structures such as
groups of geometric transformations. Other examples
include types indexed by principals--for information
flow security--and types indexed by distances--for
analysis of analytic uniform continuity properties.
Following Reynolds, we prove a general Abstraction
Theorem that covers all these instances. Consequences
of our Abstraction Theorem include free theorems
expressing invariance properties of programs, type
isomorphisms based on invariance properties, and
non-definability results indicating when certain
algebraically indexed types are uninhabited or only
inhabited by trivial programs. We have fully formalised
our framework and most examples in Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Benzaken:2013:SDS,
author = "V{\'e}ronique Benzaken and Giuseppe Castagna and Kim
Nguyen and J{\'e}r{\^o}me Sim{\'e}on",
title = "Static and dynamic semantics of {NoSQL} languages",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "101--114",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429083",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a calculus for processing semistructured
data that spans differences of application area among
several novel query languages, broadly categorized as
``NoSQL''. This calculus lets users define their own
operators, capturing a wider range of data processing
capabilities, whilst providing a typing precision so
far typical only of primitive hard-coded operators. The
type inference algorithm is based on semantic type
checking, resulting in type information that is both
precise, and flexible enough to handle structured and
semistructured data. We illustrate the use of this
calculus by encoding a large fragment of Jaql,
including operations and iterators over JSON, embedded
SQL expressions, and co-grouping, and show how the
encoding directly yields a typing discipline for Jaql
as it is, namely without the addition of any type
definition or type annotation in the code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Cerny:2013:QAR,
author = "Pavol Cerny and Thomas A. Henzinger and Arjun
Radhakrishna",
title = "Quantitative abstraction refinement",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "115--128",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429085",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a general framework for abstraction with
respect to quantitative properties, such as worst-case
execution time, or power consumption. Our framework
provides a systematic way for counter-example guided
abstraction refinement for quantitative properties. The
salient aspect of the framework is that it allows
anytime verification, that is, verification algorithms
that can be stopped at any time (for example, due to
exhaustion of memory), and report approximations that
improve monotonically when the algorithms are given
more time. We instantiate the framework with a number
of quantitative abstractions and refinement schemes,
which differ in terms of how much quantitative
information they keep from the original system. We
introduce both state-based and trace-based quantitative
abstractions, and we describe conditions that define
classes of quantitative properties for which the
abstractions provide over-approximations. We give
algorithms for evaluating the quantitative properties
on the abstract systems. We present algorithms for
counter-example based refinements for quantitative
properties for both state-based and segment-based
abstractions. We perform a case study on worst-case
execution time of executables to evaluate the anytime
verification aspect and the quantitative abstractions
we proposed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Farzan:2013:IDF,
author = "Azadeh Farzan and Zachary Kincaid and Andreas
Podelski",
title = "Inductive data flow graphs",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "129--142",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429086",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The correctness of a sequential program can be shown
by the annotation of its control flow graph with
inductive assertions. We propose inductive data flow
graphs, data flow graphs with incorporated inductive
assertions, as the basis of an approach to verifying
concurrent programs. An inductive data flow graph
accounts for a set of dependencies between program
actions in interleaved thread executions, and therefore
stands as a representation for the set of concurrent
program traces which give rise to these dependencies.
The approach first constructs an inductive data flow
graph and then checks whether all program traces are
represented. The size of the inductive data flow graph
is polynomial in the number of data dependencies (in a
sense that can be made formal); it does not grow
exponentially in the number of threads unless the data
dependencies do. The approach shifts the burden of the
exponential explosion towards the check whether all
program traces are represented, i.e., to a
combinatorial problem (over finite graphs).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{DSilva:2013:ACD,
author = "Vijay D'Silva and Leopold Haller and Daniel Kroening",
title = "Abstract conflict driven learning",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "143--154",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429087",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern satisfiability solvers implement an algorithm,
called Conflict Driven Clause Learning, which combines
search for a model with analysis of conflicts. We show
that this algorithm can be generalised to solve the
lattice-theoretic problem of determining if an additive
transformer on a Boolean lattice is always bottom. Our
generalised procedure combines overapproximations of
greatest fixed points with underapproximation of least
fixed points to obtain more precise results than
computing fixed points in isolation. We generalise
implication graphs used in satisfiability solvers to
derive underapproximate transformers from
overapproximate ones. Our generalisation provides a new
method for static analysers that operate over
non-distributive lattices to reason about properties
that require disjunction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Goyet:2013:LLB,
author = "Alexis Goyet",
title = "The {Lambda Lambda-Bar} calculus: a dual calculus for
unconstrained strategies",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "155--166",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429089",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a calculus which combines a simple,
CCS-like representation of finite behaviors, with two
dual binders $ \lambda $ and $ {\= \lambda } $.
Infinite behaviors are obtained through a syntactical
fixed-point operator, which is used to give a
translation of $ \lambda $-terms. The duality of the
calculus makes the roles of a function and its
environment symmetrical. As usual, the environment is
allowed to call a function at any given point, each
time with a different argument. Dually, the function is
allowed to answer any given call, each time with a
different behavior. This grants terms in our language
the power of functional references. The inspiration for
this language comes from game semantics. Indeed, its
normal forms give a simple concrete syntax for finite
strategies, which are inherently non-innocent. This
very direct correspondence allows us to describe, in
syntactical terms, a number of features from game
semantics. The fixed-point expansion of translated $
\lambda $-terms corresponds to the generation of
infinite plays from the finite views of an innocent
strategy. The syntactical duality between terms and
co-terms corresponds to the duality between Player and
Opponent. This duality also gives rise to a
B{\"o}hm-out lemma. The paper is divided into two
parts. The first one is purely syntactical, and
requires no background in game semantics. The second
describes the fully abstract game model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{lago:2013:GT,
author = "Ugo {Dal lago} and Barbara Petit",
title = "The geometry of types",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "167--178",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429090",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show that time complexity analysis of higher-order
functional programs can be effectively reduced to an
arguably simpler (although computationally equivalent)
verification problem, namely checking first-order
inequalities for validity. This is done by giving an
efficient inference algorithm for linear dependent
types which, given a PCF term, produces in output both
a linear dependent type and a cost expression for the
term, together with a set of proof obligations.
Actually, the output type judgement is derivable iff
all proof obligations are valid. This, coupled with the
already known relative completeness of linear dependent
types, ensures that no information is lost, i.e., that
there are no false positives or negatives. Moreover,
the procedure reflects the difficulty of the original
problem: simple PCF terms give rise to sets of proof
obligations which are easy to solve. The latter can
then be put in a format suitable for automatic or
semi-automatic verification by external solvers.
Ongoing experimental evaluation has produced
encouraging results, which are briefly presented in the
paper.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Staton:2013:UPI,
author = "Sam Staton and Paul Blain Levy",
title = "Universal properties of impure programming languages",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "179--192",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429091",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We investigate impure, call-by-value programming
languages. Our first language only has variables and
let-binding. Its equational theory is a variant of
Lambek's theory of multicategories that omits the
commutativity axiom. We demonstrate that type
constructions for impure languages --- products, sums
and functions --- can be characterized by universal
properties in the setting of `premulticategories',
multicategories where the commutativity law may fail.
This leads us to new, universal characterizations of
two earlier equational theories of impure programming
languages: the premonoidal categories of Power and
Robinson, and the monad-based models of Moggi. Our
analysis thus puts these earlier abstract ideas on a
canonical foundation, bringing them to a new, syntactic
level.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Hur:2013:PPC,
author = "Chung-Kil Hur and Georg Neis and Derek Dreyer and
Viktor Vafeiadis",
title = "The power of parameterization in coinductive proof",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "193--206",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429093",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Coinduction is one of the most basic concepts in
computer science. It is therefore surprising that the
commonly-known lattice-theoretic accounts of the
principles underlying coinductive proofs are lacking in
two key respects: they do not support compositional
reasoning (i.e. breaking proofs into separate pieces
that can be developed in isolation), and they do not
support incremental reasoning (i.e. developing proofs
interactively by starting from the goal and
generalizing the coinduction hypothesis repeatedly as
necessary). In this paper, we show how to support
coinductive proofs that are both compositional and
incremental, using a dead simple construction we call
the parameterized greatest fixed point. The basic idea
is to parameterize the greatest fixed point of interest
over the accumulated knowledge of ``the proof so far''.
While this idea has been proposed before, by Winskel in
1989 and by Moss in 2001, neither of the previous
accounts suggests its general applicability to
improving the state of the art in interactive
coinductive proof. In addition to presenting the
lattice-theoretic foundations of parameterized
coinduction, demonstrating its utility on
representative examples, and studying its composition
with ``up-to'' techniques, we also explore its
mechanization in proof assistants like Coq and
Isabelle. Unlike traditional approaches to mechanizing
coinduction (e.g. Coq's cofix), which employ syntactic
``guardedness checking'', parameterized coinduction
offers a semantic account of guardedness. This leads to
faster and more robust proof development, as we
demonstrate using our new Coq library, Paco.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Delaware:2013:MTC,
author = "Benjamin Delaware and Bruno C. d. S. Oliveira and Tom
Schrijvers",
title = "Meta-theory {\`a} la carte",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "207--218",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429094",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Formalizing meta-theory, or proofs about programming
languages, in a proof assistant has many well-known
benefits. Unfortunately, the considerable effort
involved in mechanizing proofs has prevented it from
becoming standard practice. This cost can be amortized
by reusing as much of existing mechanized
formalizations as possible when building a new language
or extending an existing one. One important challenge
in achieving reuse is that the inductive definitions
and proofs used in these formalizations are closed to
extension. This forces language designers to cut and
paste existing definitions and proofs in an ad-hoc
manner and to expend considerable effort to patch up
the results. The key contribution of this paper is the
development of an induction technique for extensible
Church encodings using a novel reinterpretation of the
universal property of folds. These encodings provide
the foundation for a framework, formalized in Coq,
which uses type classes to automate the composition of
proofs from modular components. This framework enables
a more structured approach to the reuse of meta-theory
formalizations through the composition of modular
inductive definitions and proofs. Several interesting
language features, including binders and general
recursion, illustrate the capabilities of our
framework. We reuse these features to build fully
mechanized definitions and proofs for a number of
languages, including a version of mini-ML. Bounded
induction enables proofs of properties for
non-inductive semantic functions, and mediating type
classes enable proof adaptation for more feature-rich
languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Park:2013:TPB,
author = "Jonghyun Park and Jeongbong Seo and Sungwoo Park",
title = "A theorem prover for {Boolean} {BI}",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "219--232",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429095",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While separation logic is acknowledged as an enabling
technology for large-scale program verification, most
of the existing verification tools use only a fragment
of separation logic that excludes separating
implication. As the first step towards a verification
tool using full separation logic, we develop a nested
sequent calculus for Boolean BI (Bunched Implications),
the underlying theory of separation logic, as well as a
theorem prover based on it. A salient feature of our
nested sequent calculus is that its sequent may have
not only smaller child sequents but also multiple
parent sequents, thus producing a graph structure of
sequents instead of a tree structure. Our theorem
prover is based on backward search in a refinement of
the nested sequent calculus in which weakening and
contraction are built into all the inference rules. We
explain the details of designing our theorem prover and
provide empirical evidence of its practicality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Krishnamurthi:2013:PPL,
author = "Shriram Krishnamurthi",
title = "From principles to programming languages (and back)",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "233--234",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429097",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Batty:2013:LAC,
author = "Mark Batty and Mike Dodds and Alexey Gotsman",
title = "Library abstraction for {C\slash C++} concurrency",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "235--248",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429099",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When constructing complex concurrent systems,
abstraction is vital: programmers should be able to
reason about concurrent libraries in terms of abstract
specifications that hide the implementation details.
Relaxed memory models present substantial challenges in
this respect, as libraries need not provide
sequentially consistent abstractions: to avoid
unnecessary synchronisation, they may allow clients to
observe relaxed memory effects, and library
specifications must capture these. In this paper, we
propose a criterion for sound library abstraction in
the new C11 and C++11 memory model, generalising the
standard sequentially consistent notion of
linearizability. We prove that our criterion soundly
captures all client-library interactions, both through
call and return values, and through the subtle
synchronisation effects arising from the memory model.
To illustrate our approach, we verify implementations
against specifications for the lock-free Treiber stack
and a producer-consumer queue. Ours is the first
approach to compositional reasoning for concurrent
C11/C++11 programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Ramalingam:2013:FTI,
author = "Ganesan Ramalingam and Kapil Vaswani",
title = "Fault tolerance via idempotence",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "249--262",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429100",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Building distributed services and applications is
challenging due to the pitfalls of distribution such as
process and communication failures. A natural solution
to these problems is to detect potential failures, and
retry the failed computation and/or resend messages.
Ensuring correctness in such an environment requires
distributed services and applications to be idempotent.
In this paper, we study the inter-related aspects of
process failures, duplicate messages, and idempotence.
We first introduce a simple core language (based on
lambda calculus) inspired by modern distributed
computing platforms. This language formalizes the
notions of a service, duplicate requests, process
failures, data partitioning, and local atomic
transactions that are restricted to a single store. We
then formalize a desired (generic) correctness
criterion for applications written in this language,
consisting of idempotence (which captures the desired
safety properties) and failure-freedom (which captures
the desired progress properties). We then propose
language support in the form of a monad that
automatically ensures failfree idempotence. A key
characteristic of our implementation is that it is
decentralized and does not require distributed
coordination. We show that the language support can be
enriched with other useful constructs, such as
compensations, while retaining the coordination-free
decentralized nature of the implementation. We have
implemented the idempotence monad (and its variants) in
F\# and C\# and used our implementation to build
realistic applications on Windows Azure. We find that
the monad has low runtime overheads and leads to more
declarative applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Carbone:2013:DFD,
author = "Marco Carbone and Fabrizio Montesi",
title = "Deadlock-freedom-by-design: multiparty asynchronous
global programming",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "263--274",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429101",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the last decade, global descriptions have been
successfully employed for the verification and
implementation of communicating systems, respectively
as protocol specifications and choreographies. In this
work, we bring these two practices together by
proposing a purely-global programming model. We show a
novel interpretation of asynchrony and parallelism in a
global setting and develop a typing discipline that
verifies choreographies against protocol
specifications, based on multiparty sessions.
Exploiting the nature of global descriptions, our type
system defines a new class of deadlock-free concurrent
systems (deadlock-freedom-by-design), provides type
inference, and supports session mobility. We give a
notion of Endpoint Projection (EPP) which generates
correct entity code (as pi-calculus terms) from a
choreography. Finally, we evaluate our approach by
providing a prototype implementation for a concrete
programming language and by applying it to some
examples from multicore and service-oriented
programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Caires:2013:TDB,
author = "Lu{\'\i}s Caires and Jo{\~a}o C. Seco",
title = "The type discipline of behavioral separation",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "275--286",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429103",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce the concept of behavioral separation as a
general principle for disciplining interference in
higher-order imperative concurrent programs, and
present a type-based approach that systematically
develops the concept in the context of an ML-like
language extended with concurrency and synchronization
primitives. Behavioral separation builds on notions
originally introduced for behavioral type systems and
separation logics, but shifts the focus from the
separation of static program state properties towards
the separation of dynamic usage behaviors of runtime
values. Behavioral separation types specify how values
may be safely used by client code, and can enforce
fine-grained interference control disciplines while
preserving compositionality, information hiding, and
flexibility. We illustrate how our type system, even if
based on a small set of general primitives, is already
able to tackle fairly challenging program idioms,
involving aliasing at various types, concurrency with
first-class threads, manipulation of linked data
structures, behavioral borrowing, and invariant-based
separation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Dinsdale-Young:2013:VCR,
author = "Thomas Dinsdale-Young and Lars Birkedal and Philippa
Gardner and Matthew Parkinson and Hongseok Yang",
title = "{Views}: compositional reasoning for concurrent
programs",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "287--300",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429104",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compositional abstractions underly many reasoning
principles for concurrent programs: the concurrent
environment is abstracted in order to reason about a
thread in isolation; and these abstractions are
composed to reason about a program consisting of many
threads. For instance, separation logic uses formulae
that describe part of the state, abstracting the rest;
when two threads use disjoint state, their
specifications can be composed with the separating
conjunction. Type systems abstract the state to the
types of variables; threads may be composed when they
agree on the types of shared variables. In this paper,
we present the ``Concurrent Views Framework'', a
metatheory of concurrent reasoning principles. The
theory is parameterised by an abstraction of state with
a notion of composition, which we call views. The
metatheory is remarkably simple, but highly applicable:
the rely-guarantee method, concurrent separation logic,
concurrent abstract predicates, type systems for
recursive references and for unique pointers, and even
an adaptation of the Owicki-Gries method can all be
seen as instances of the Concurrent Views Framework.
Moreover, our metatheory proves each of these systems
is sound without requiring induction on the operational
semantics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Jensen:2013:HLS,
author = "Jonas B. Jensen and Nick Benton and Andrew Kennedy",
title = "High-level separation logic for low-level code",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "301--314",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429105",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Separation logic is a powerful tool for reasoning
about structured, imperative programs that manipulate
pointers. However, its application to unstructured,
lower-level languages such as assembly language or
machine code remains challenging. In this paper we
describe a separation logic tailored for this purpose
that we have applied to x86 machine-code programs. The
logic is built from an assertion logic on machine
states over which we construct a specification logic
that encapsulates uses of frames and step indexing. The
traditional notion of Hoare triple is not applicable
directly to unstructured machine code, where code and
data are mixed together and programs do not in general
run to completion, so instead we adopt a
continuation-passing style of specification with
preconditions alone. Nevertheless, the range of
primitives provided by the specification logic, which
include a higher-order frame connective, a novel
read-only frame connective, and a 'later' modality,
support the definition of derived forms to support
structured-programming-style reasoning for common
cases, in which standard rules for Hoare triples are
derived as lemmas. Furthermore, our encoding of scoped
assembly-language labels lets us give definitions and
proof rules for powerful assembly-language 'macros'
such as while loops, conditionals and procedures. We
have applied the framework to a model of sequential x86
machine code built entirely within the Coq proof
assistant, including tactic support based on
computational reflection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Myers:2013:HLC,
author = "Andrew C. Myers",
title = "How languages can save distributed computing",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "315--316",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429107",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Henzinger:2013:QRC,
author = "Thomas A. Henzinger and Christoph M. Kirsch and Hannes
Payer and Ali Sezgin and Ana Sokolova",
title = "Quantitative relaxation of concurrent data
structures",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "317--328",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429109",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There is a trade-off between performance and
correctness in implementing concurrent data structures.
Better performance may be achieved at the expense of
relaxing correctness, by redefining the semantics of
data structures. We address such a redefinition of data
structure semantics and present a systematic and formal
framework for obtaining new data structures by
quantitatively relaxing existing ones. We view a data
structure as a sequential specification S containing
all ``legal'' sequences over an alphabet of method
calls. Relaxing the data structure corresponds to
defining a distance from any sequence over the alphabet
to the sequential specification: the k-relaxed
sequential specification contains all sequences over
the alphabet within distance k from the original
specification. In contrast to other existing work, our
relaxations are semantic (distance in terms of data
structure states). As an instantiation of our
framework, we present two simple yet generic relaxation
schemes, called out-of-order and stuttering relaxation,
along with several ways of computing distances. We show
that the out-of-order relaxation, when further
instantiated to stacks, queues, and priority queues,
amounts to tolerating bounded out-of-order behavior,
which cannot be captured by a purely syntactic
relaxation (distance in terms of sequence manipulation,
e.g. edit distance). We give concurrent implementations
of relaxed data structures and demonstrate that bounded
relaxations provide the means for trading correctness
for performance in a controlled way. The relaxations
are monotonic which further highlights the trade-off:
increasing k increases the number of permitted
sequences, which as we demonstrate can lead to better
performance. Finally, since a relaxed stack or queue
also implements a pool, we actually have new concurrent
pool implementations that outperform the
state-of-the-art ones.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Demange:2013:PBB,
author = "Delphine Demange and Vincent Laporte and Lei Zhao and
Suresh Jagannathan and David Pichardie and Jan Vitek",
title = "{Plan B}: a buffered memory model for {Java}",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "329--342",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429110",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent advances in verification have made it possible
to envision trusted implementations of real-world
languages. Java with its type-safety and fully
specified semantics would appear to be an ideal
candidate; yet, the complexity of the translation steps
used in production virtual machines have made it a
challenging target for verifying compiler technology.
One of Java's key innovations, its memory model, poses
significant obstacles to such an endeavor. The Java
Memory Model is an ambitious attempt at specifying the
behavior of multithreaded programs in a portable,
hardware agnostic, way. While experts have an intuitive
grasp of the properties that the model should enjoy,
the specification is complex and not well-suited for
integration within a verifying compiler infrastructure.
Moreover, the specification is given in an axiomatic
style that is distant from the intuitive
reordering-based reasonings traditionally used to
justify or rule out behaviors, and ill suited to the
kind of operational reasoning one would expect to
employ in a compiler. This paper takes a step back, and
introduces a Buffered Memory Model (BMM) for Java. We
choose a pragmatic point in the design space
sacrificing generality in favor of a model that is
fully characterized in terms of the reorderings it
allows, amenable to formal reasoning, and which can be
efficiently applied to a specific hardware family,
namely x86 multiprocessors. Although the BMM restricts
the reorderings compilers are allowed to perform, it
serves as the key enabling device to achieving a
verification pathway from bytecode to machine
instructions. Despite its restrictions, we show that it
is backwards compatible with the Java Memory Model and
that it does not cripple performance on TSO
architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Turon:2013:LRF,
author = "Aaron J. Turon and Jacob Thamsborg and Amal Ahmed and
Lars Birkedal and Derek Dreyer",
title = "Logical relations for fine-grained concurrency",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "343--356",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429111",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Fine-grained concurrent data structures (or FCDs)
reduce the granularity of critical sections in both
time and space, thus making it possible for clients to
access different parts of a mutable data structure in
parallel. However, the tradeoff is that the
implementations of FCDs are very subtle and tricky to
reason about directly. Consequently, they are carefully
designed to be contextual refinements of their
coarse-grained counterparts, meaning that their clients
can reason about them as if all access to them were
sequentialized. In this paper, we propose a new
semantic model, based on Kripke logical relations, that
supports direct proofs of contextual refinement in the
setting of a type-safe high-level language. The key
idea behind our model is to provide a simple way of
expressing the ``local life stories'' of individual
pieces of an FCD's hidden state by means of protocols
that the threads concurrently accessing that state must
follow. By endowing these protocols with a simple yet
powerful transition structure, as well as the ability
to assert invariants on both heap states and
specification code, we are able to support clean and
intuitive refinement proofs for the most sophisticated
types of FCDs, such as conditional compare-and-set
(CCAS).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Gaboardi:2013:LDT,
author = "Marco Gaboardi and Andreas Haeberlen and Justin Hsu
and Arjun Narayan and Benjamin C. Pierce",
title = "Linear dependent types for differential privacy",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "357--370",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429113",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Differential privacy offers a way to answer queries
about sensitive information while providing strong,
provable privacy guarantees, ensuring that the presence
or absence of a single individual in the database has a
negligible statistical effect on the query's result.
Proving that a given query has this property involves
establishing a bound on the query's sensitivity---how
much its result can change when a single record is
added or removed. A variety of tools have been
developed for certifying that a given query
differentially private. In one approach, Reed and
Pierce [34] proposed a functional programming language,
Fuzz, for writing differentially private queries. Fuzz
uses linear types to track sensitivity and a
probability monad to express randomized computation; it
guarantees that any program with a certain type is
differentially private. Fuzz can successfully verify
many useful queries. However, it fails when the
sensitivity analysis depends on values that are not
known statically. We present DFuzz, an extension of
Fuzz with a combination of linear indexed types and
lightweight dependent types. This combination allows a
richer sensitivity analysis that is able to certify a
larger class of queries as differentially private,
including ones whose sensitivity depends on runtime
information. As in Fuzz, the differential privacy
guarantee follows directly from the soundness theorem
of the type system. We demonstrate the enhanced
expressivity of DFuzz by certifying differential
privacy for a broad class of iterative algorithms that
could not be typed previously.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Fournet:2013:FAC,
author = "Cedric Fournet and Nikhil Swamy and Juan Chen and
Pierre-Evariste Dagand and Pierre-Yves Strub and
Benjamin Livshits",
title = "Fully abstract compilation to {JavaScript}",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "371--384",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429114",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many tools allow programmers to develop applications
in high-level languages and deploy them in web browsers
via compilation to JavaScript. While practical and
widely used, these compilers are ad hoc: no guarantee
is provided on their correctness for whole programs,
nor their security for programs executed within
arbitrary JavaScript contexts. This paper presents a
compiler with such guarantees. We compile an ML-like
language with higher-order functions and references to
JavaScript, while preserving all source program
properties. Relying on type-based invariants and
applicative bisimilarity, we show full abstraction: two
programs are equivalent in all source contexts if and
only if their wrapped translations are equivalent in
all JavaScript contexts. We evaluate our compiler on
sample programs, including a series of secure
libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Livshits:2013:TFA,
author = "Benjamin Livshits and Stephen Chong",
title = "Towards fully automatic placement of security
sanitizers and declassifiers",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "385--398",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429115",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A great deal of research on sanitizer placement,
sanitizer correctness, checking path validity, and
policy inference, has been done in the last five to ten
years, involving type systems, static analysis and
runtime monitoring and enforcement. However, in pretty
much all work thus far, the burden of sanitizer
placement has fallen on the developer. However,
sanitizer placement in large-scale applications is
difficult, and developers are likely to make errors,
and thus create security vulnerabilities. This paper
advocates a radically different approach: we aim to
fully automate the placement of sanitizers by analyzing
the ow of tainted data in the program. We argue that
developers are better off leaving out sanitizers
entirely instead of trying to place them. This paper
proposes a fully automatic technique for sanitizer
placement. Placement is static whenever possible,
switching to run time when necessary. Run-time taint
tracking techniques can be used to track the source of
a value, and thus apply appropriate sanitization.
However, due to the runtime overhead of run-time taint
tracking, our technique avoids it wherever possible.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Goodman:2013:PPP,
author = "Noah D. Goodman",
title = "The principles and practice of probabilistic
programming",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "399--402",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429117",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Gordon:2013:MLP,
author = "Andrew D. Gordon and Mihhail Aizatulin and Johannes
Borgstrom and Guillaume Claret and Thore Graepel and
Aditya V. Nori and Sriram K. Rajamani and Claudio
Russo",
title = "A model-learner pattern for {Bayesian} reasoning",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "403--416",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429119",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A Bayesian model is based on a pair of probability
distributions, known as the prior and sampling
distributions. A wide range of fundamental machine
learning tasks, including regression, classification,
clustering, and many others, can all be seen as
Bayesian models. We propose a new probabilistic
programming abstraction, a typed Bayesian model, which
is based on a pair of probabilistic expressions for the
prior and sampling distributions. A sampler for a model
is an algorithm to compute synthetic data from its
sampling distribution, while a learner for a model is
an algorithm for probabilistic inference on the model.
Models, samplers, and learners form a generic
programming pattern for model-based inference. They
support the uniform expression of common tasks
including model testing, and generic compositions such
as mixture models, evidence-based model averaging, and
mixtures of experts. A formal semantics supports
reasoning about model equivalence and implementation
correctness. By developing a series of examples and
three learner implementations based on exact inference,
factor graphs, and Markov chain Monte Carlo, we
demonstrate the broad applicability of this new
programming pattern.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Suenaga:2013:HPS,
author = "Kohei Suenaga and Hiroyoshi Sekine and Ichiro Hasuo",
title = "Hyperstream processing systems: nonstandard modeling
of continuous-time signals",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "417--430",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429120",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We exploit the apparent similarity between
(discrete-time) stream processing and (continuous-time)
signal processing and transfer a deductive verification
framework from the former to the latter. Our
development is based on rigorous semantics that relies
on nonstandard analysis (NSA). Specifically, we start
with a discrete framework consisting of a Lustre-like
stream processing language, its Kahn-style fixed point
semantics, and a program logic (in the form of a type
system) for partial correctness guarantees. This stream
framework is transferred as it is to one for
hyperstreams---streams of streams, that typically arise
from sampling (continuous-time) signals with
progressively smaller intervals---via the logical
infrastructure of NSA. Under a certain continuity
assumption we identify hyperstreams with signals; our
final outcome thus obtained is a deductive verification
framework of signals. In it one verifies properties of
signals using the (conventionally discrete) proof
principles, like fixed point induction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Vytiniotis:2013:HHL,
author = "Dimitrios Vytiniotis and Simon Peyton Jones and Koen
Claessen and Dan Ros{\'e}n",
title = "{HALO}: {Haskell} to logic through denotational
semantics",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "431--442",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429121",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Even well-typed programs can go wrong in modern
functional languages, by encountering a pattern-match
failure, or simply returning the wrong answer. An
increasingly-popular response is to allow programmers
to write contracts that express semantic properties,
such as crash-freedom or some useful post-condition. We
study the static verification of such contracts. Our
main contribution is a novel translation to first-order
logic of both Haskell programs, and contracts written
in Haskell, all justified by denotational semantics.
This translation enables us to prove that functions
satisfy their contracts using an off-the-shelf
first-order logic theorem prover.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Botincan:2013:SSL,
author = "Matko Botincan and Domagoj Babi{\'c}",
title = "{Sigma*}: symbolic learning of input-output
specifications",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "443--456",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429123",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Sigma*, a novel technique for learning
symbolic models of software behavior. Sigma* addresses
the challenge of synthesizing models of software by
using symbolic conjectures and abstraction. By
combining dynamic symbolic execution to discover
symbolic input-output steps of the programs and
counterexample guided abstraction refinement to
over-approximate program behavior, Sigma* transforms
arbitrary source representation of programs into
faithful input-output models. We define a class of
stream filters---programs that process streams of data
items---for which Sigma* converges to a complete model
if abstraction refinement eventually builds up a
sufficiently strong abstraction. In other words, Sigma*
is complete relative to abstraction. To represent
inferred symbolic models, we use a variant of symbolic
transducers that can be effectively composed and
equivalence checked. Thus, Sigma* enables fully
automatic analysis of behavioral properties such as
commutativity, reversibility and idempotence, which is
useful for web sanitizer verification and stream
programs compiler optimizations, as we show
experimentally. We also show how models inferred by
Sigma* can boost performance of stream programs by
parallelized code generation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Bonchi:2013:CNE,
author = "Filippo Bonchi and Damien Pous",
title = "Checking {NFA} equivalence with bisimulations up to
congruence",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "457--468",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429124",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce bisimulation up to congruence as a
technique for proving language equivalence of
non-deterministic finite automata. Exploiting this
technique, we devise an optimisation of the classical
algorithm by Hopcroft and Karp. We compare our approach
to the recently introduced antichain algorithms, by
analysing and relating the two underlying coinductive
proof methods. We give concrete examples where we
exponentially improve over antichains; experimental
results moreover show non negligible improvements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Koksal:2013:SBM,
author = "Ali Sinan Koksal and Yewen Pu and Saurabh Srivastava
and Rastislav Bodik and Jasmin Fisher and Nir
Piterman",
title = "Synthesis of biological models from mutation
experiments",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "469--482",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429125",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Executable biology presents new challenges to formal
methods. This paper addresses two problems that cell
biologists face when developing formally analyzable
models. First, we show how to automatically synthesize
a concurrent in-silico model for cell development given
in-vivo experiments of how particular mutations
influence the experiment outcome. The problem of
synthesis under mutations is unique because mutations
may produce non-deterministic outcomes (presumably by
introducing races between competing signaling pathways
in the cells) and the synthesized model must be able to
replay all these outcomes in order to faithfully
describe the modeled cellular processes. In contrast, a
``regular'' concurrent program is correct if it picks
any outcome allowed by the non-deterministic
specification. We developed synthesis algorithms and
synthesized a model of cell fate determination of the
earthworm {\em C. elegans}. A version of this model
previously took systems biologists months to develop.
Second, we address the problem of under-constrained
specifications that arise due to incomplete sets of
mutation experiments. Under-constrained specifications
give rise to distinct models, each explaining the same
phenomenon differently. Addressing the ambiguity of
specifications corresponds to analyzing the space of
plausible models. We develop algorithms for detecting
ambiguity in specifications, i.e., whether there exist
alternative models that would produce different fates
on some unperformed experiment, and for removing
redundancy from specifications, i.e., computing minimal
non-ambiguous specifications. Additionally, we develop
a modeling language and embed it into Scala. We
describe how this language design and embedding allows
us to build an efficient synthesizer. For our {\em C.
elegans\/} case study, we infer two observationally
equivalent models expressing different biological
hypotheses through different protein interactions. One
of these hypotheses was previously unknown to
biologists.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Upadrasta:2013:SPS,
author = "Ramakrishna Upadrasta and Albert Cohen",
title = "Sub-polyhedral scheduling using
(unit-)two-variable-per-inequality polyhedra",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "483--496",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429127",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Polyhedral compilation has been successful in the
design and implementation of complex loop nest
optimizers and parallelizing compilers. The algorithmic
complexity and scalability limitations remain one
important weakness. We address it using sub-polyhedral
under-approximations of the systems of constraints
resulting from affine scheduling problems. We propose a
sub-polyhedral scheduling technique using
(Unit-)Two-Variable-Per-Inequality or (U)TVPI
Polyhedra. This technique relies on simple polynomial
time algorithms to under-approximate a general
polyhedron into (U)TVPI polyhedra. We modify the
state-of-the-art PLuTo compiler using our scheduling
technique, and show that for a majority of the
Polybench (2.0) kernels, the above under-approximations
yield polyhedra that are non-empty. Solving the
under-approximated system leads to asymptotic gains in
complexity, and shows practically significant
improvements when compared to a traditional LP solver.
We also verify that code generated by our
sub-polyhedral parallelization prototype matches the
performance of PLuTo-optimized code when the
under-approximation preserves feasibility.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Rompf:2013:ODS,
author = "Tiark Rompf and Arvind K. Sujeeth and Nada Amin and
Kevin J. Brown and Vojin Jovanovic and HyoukJoong Lee
and Manohar Jonnalagedda and Kunle Olukotun and Martin
Odersky",
title = "Optimizing data structures in high-level programs: new
directions for extensible compilers based on staging",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "497--510",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429128",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High level data structures are a cornerstone of modern
programming and at the same time stand in the way of
compiler optimizations. In order to reason about user-
or library-defined data structures compilers need to be
extensible. Common mechanisms to extend compilers fall
into two categories. Frontend macros, staging or
partial evaluation systems can be used to
programmatically remove abstraction and specialize
programs before they enter the compiler. Alternatively,
some compilers allow extending the internal workings by
adding new transformation passes at different points in
the compile chain or adding new intermediate
representation (IR) types. None of these mechanisms
alone is sufficient to handle the challenges posed by
high level data structures. This paper shows a novel
way to combine them to yield benefits that are greater
than the sum of the parts. Instead of using staging
merely as a front end, we implement internal compiler
passes using staging as well. These internal passes
delegate back to program execution to construct the
transformed IR. Staging is known to simplify program
generation, and in the same way it can simplify program
transformation. Defining a transformation as a staged
IR interpreter is simpler than implementing a low-level
IR to IR transformer. With custom IR nodes, many
optimizations that are expressed as rewritings from IR
nodes to staged program fragments can be combined into
a single pass, mitigating phase ordering problems.
Speculative rewriting can preserve optimistic
assumptions around loops. We demonstrate several
powerful program optimizations using this architecture
that are particularly geared towards data structures: a
novel loop fusion and deforestation algorithm, array of
struct to struct of array conversion, object flattening
and code generation for heterogeneous parallel devices.
We validate our approach using several non trivial case
studies that exhibit order of magnitude speedups in
experiments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Adams:2013:PPI,
author = "Michael D. Adams",
title = "Principled parsing for indentation-sensitive
languages: revisiting {Landin}'s offside rule",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "511--522",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429129",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several popular languages, such as Haskell, Python,
and F\#, use the indentation and layout of code as part
of their syntax. Because context-free grammars cannot
express the rules of indentation, parsers for these
languages currently use ad hoc techniques to handle
layout. These techniques tend to be low-level and
operational in nature and forgo the advantages of more
declarative specifications like context-free grammars.
For example, they are often coded by hand instead of
being generated by a parser generator. This paper
presents a simple extension to context-free grammars
that can express these layout rules, and derives GLR
and LR(k) algorithms for parsing these grammars. These
grammars are easy to write and can be parsed
efficiently. Examples for several languages are
presented, as are benchmarks showing the practical
efficiency of these algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Hobor:2013:RSD,
author = "Aquinas Hobor and Jules Villard",
title = "The ramifications of sharing in data structures",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "523--536",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429131",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programs manipulating mutable data structures with
intrinsic sharing present a challenge for modular
verification. Deep aliasing inside data structures
dramatically complicates reasoning in isolation over
parts of these objects because changes to one part of
the structure (say, the left child of a dag node) can
affect other parts (the right child or some of its
descendants) that may point into it. The result is that
finding intuitive and compositional proofs of
correctness is usually a struggle. We propose a
compositional proof system that enables local reasoning
in the presence of sharing. While the AI ``frame
problem'' elegantly captures the reasoning required to
verify programs without sharing, we contend that
natural reasoning about programs with sharing instead
requires an answer to a different and more challenging
AI problem, the ``ramification problem'': reasoning
about the indirect consequences of actions.
Accordingly, we present a RAMIFY proof rule that
attacks the ramification problem head-on and show how
to reason with it. Our framework is valid in any
separation logic and permits sound compositional and
local reasoning in the context of both specified and
unspecified sharing. We verify the correctness of a
number of examples, including programs that manipulate
dags, graphs, and overlaid data structures in
nontrivial ways.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Totla:2013:CIB,
author = "Nishant Totla and Thomas Wies",
title = "Complete instantiation-based interpolation",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "537--548",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429132",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Craig interpolation has been a valuable tool for
formal methods with interesting applications in program
analysis and verification. Modern SMT solvers implement
interpolation procedures for the theories that are most
commonly used in these applications. However, many
application-specific theories remain unsupported, which
limits the class of problems to which
interpolation-based techniques apply. In this paper, we
present a generic framework to build new interpolation
procedures via reduction to existing interpolation
procedures. We consider the case where an
application-specific theory can be formalized as an
extension of a base theory with additional symbols and
axioms. Our technique uses finite instantiation of the
extension axioms to reduce an interpolation problem in
the theory extension to one in the base theory. We
identify a model-theoretic criterion that allows us to
detect the cases where our technique is complete. We
discuss specific theories that are relevant in program
verification and that satisfy this criterion. In
particular, we obtain complete interpolation procedures
for theories of arrays and linked lists. The latter is
the first complete interpolation procedure for a theory
that supports reasoning about complex shape properties
of heap-allocated data structures. We have implemented
this procedure in a prototype on top of existing SMT
solvers and used it to automatically infer loop
invariants of list-manipulating programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Barr:2013:ADF,
author = "Earl T. Barr and Thanh Vo and Vu Le and Zhendong Su",
title = "Automatic detection of floating-point exceptions",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "549--560",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429133",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is well-known that floating-point exceptions can be
disastrous and writing exception-free numerical
programs is very difficult. Thus, it is important to
automatically detect such errors. In this paper, we
present Ariadne, a practical symbolic execution system
specifically designed and implemented for detecting
floating-point exceptions. Ariadne systematically
transforms a numerical program to explicitly check each
exception triggering condition. Ariadne symbolically
executes the transformed program using real arithmetic
to find candidate real-valued inputs that can reach and
trigger an exception. Ariadne converts each candidate
input into a floating-point number, then tests it
against the original program. In general, approximating
floating-point arithmetic with real arithmetic can
change paths from feasible to infeasible and vice
versa. The key insight of this work is that, for the
problem of detecting floating-point exceptions, this
approximation works well in practice because, if one
input reaches an exception, many are likely to, and at
least one of them will do so over both floating-point
and real arithmetic. To realize Ariadne, we also
devised a novel, practical linearization technique to
solve nonlinear constraints. We extensively evaluated
Ariadne over 467 scalar functions in the widely used
GNU Scientific Library (GSL). Our results show that
Ariadne is practical and identifies a large number of
real runtime exceptions in GSL. The GSL developers
confirmed our preliminary findings and look forward to
Ariadne's public release, which we plan to do in the
near future.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Ley-Wild:2013:SAS,
author = "Ruy Ley-Wild and Aleksandar Nanevski",
title = "Subjective auxiliary state for coarse-grained
concurrency",
journal = j-SIGPLAN,
volume = "48",
number = "1",
pages = "561--574",
month = jan,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480359.2429134",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:03 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "From Owicki-Gries' Resource Invariants and Jones'
Rely/Guarantee to modern variants based on Separation
Logic, axiomatic logics for concurrency require
auxiliary state to explicitly relate the effect of all
threads to the global invariant on the shared resource.
Unfortunately, auxiliary state gives the proof of an
individual thread access to the auxiliaries of all
other threads. This makes proofs sensitive to the
global context, which prevents local reasoning and
compositionality. To tame this historical difficulty of
auxiliary state, we propose subjective auxiliary state,
whereby each thread is verified using a self view
(i.e., the thread's effect on the shared resource) and
an other view (i.e., the collective effect of all the
other threads). Subjectivity generalizes auxiliary
state from stacks and heaps to user-chosen partial
commutative monoids, which can eliminate the dependence
on the global thread structure. We employ subjectivity
to formulate Subjective Concurrent Separation Logic as
a combination of subjective auxiliary state and
Concurrent Separation Logic. The logic yields simple,
compositional proofs of coarse-grained concurrent
programs that use auxiliary state, and scales to
support higher-order recursive procedures that can
themselves fork new threads. We prove the soundness of
the logic with a novel denotational semantics of action
trees and a definition of safety using rely/guarantee
transitions over a large subjective footprint. We have
mechanized the denotational semantics, logic,
metatheory, and a number of examples by a shallow
embedding in Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '13 conference proceedings.",
}
@Article{Miller:2013:TSG,
author = "Mark Miller",
title = "A tested semantics for getters, setters, and eval in
{JavaScript}",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "1--16",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384579",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present S5, a semantics for the strict mode of the
ECMAScript 5.1 (JavaScript) programming language. S5
shrinks the large source language into a manageable
core through an implemented transformation. The
resulting specification has been tested against
real-world conformance suites for the language. This
paper focuses on two aspects of S5: accessors (getters
and setters) and eval. Since these features are complex
and subtle in JavaScript, they warrant special study.
Variations on both features are found in several other
programming languages, so their study is likely to have
broad applicability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Homer:2013:POG,
author = "Michael Homer and James Noble and Kim B. Bruce and
Andrew P. Black and David J. Pearce",
title = "Patterns as objects in {Grace}",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "17--28",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384581",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Object orientation and pattern matching are often seen
as conflicting approaches to program design.
Object-oriented programs place type-dependent behavior
inside objects and invoke it via dynamic dispatch,
while pattern-matching programs place type-dependent
behavior outside data structures and invoke it via
multiway conditionals (case statements). Grace is a
new, dynamic, object-oriented language designed to
support teaching: to this end, Grace needs to support
both styles. We explain how this conflict can be
resolved gracefully: by modelling patterns and cases as
partial functions, reifying those functions as objects,
and then building up complex patterns from simpler ones
using pattern combinators. We describe the
implementation of this design as an object-oriented
framework, and a case study of its effectiveness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Bloom:2013:RSP,
author = "Bard Bloom and Martin J. Hirzel",
title = "Robust scripting via patterns",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "29--40",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384582",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic typing in scripting languages is a two-edged
sword. On the one hand, it can be more flexible and
more concise than static typing. On the other hand, it
can lead to less robust code. We argue that patterns
can give scripts much of the robustness of static
typing, without losing the flexibility and concision of
dynamic typing. To make this case, we describe a rich
pattern system in the dynamic language Thorn. Thorn
patterns interact with its control constructs and
scoping rules to support concise and robust
test-and-extract idioms. Thorn patterns encompass an
extensive set of features from ML-style patterns to
regular expressions and beyond. And Thorn patterns can
be first-class and support pattern-punning (mirror
constructor syntax). Overall, this paper describes a
powerful pattern system that makes scripting more
robust.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Normark:2013:OOP,
author = "Kurt N{\o}rmark and Lone Leth Thomsen and Bent
Thomsen",
title = "Object-oriented programming with gradual abstraction",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "41--52",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384583",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe an experimental object-oriented
programming language, ASL2, that supports program
development by means of a series of abstraction steps.
The language allows immediate object construction, and
it is possible to use the constructed objects for
concrete problem solving tasks. Classes and class
hierarchies can be derived from the objects --- via
gradual abstraction steps. We introduce two levels of
object classification, called weak and strong object
classification. Strong object classification relies on
conventional classes, whereas weak object
classification is looser, and less restrictive. As a
central mechanism, weakly classified objects are
allowed to borrow methods from each other. ASL2
supports class generalization, as a counterpart to
class specialization and inheritance in mainstream
object-oriented programming languages. The final
abstraction step discussed in this paper is a
syntactical abstraction step that derives a source file
with a syntactical class form.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Pignotti:2013:ADP,
author = "Alessandro Pignotti and Adam Welc and Bernd Mathiske",
title = "Adaptive data parallelism for {Internet} clients on
heterogeneous platforms",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "53--62",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384585",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's Internet is long past static web pages filled
with HTML-formatted text sprinkled with an occasional
image or animation. We have entered an era of Rich
Internet Applications executed locally on Internet
clients such as web browsers: games, physics engines,
image rendering, photo editing, etc. Yet today's
languages used to program Internet clients have limited
ability to tap to the computational capabilities of the
underlying, often heterogeneous, platforms. In this
paper we present how a Domain Specific Language(DSL)
can be integrated into ActionScript, one of the most
popular scripting languages used to program Internet
clients and a close cousin of JavaScript. We
demonstrate how our DSL, called ASDP (ActionScript Data
Parallel), can be used to enable data parallelism for
existing sequential programs. We also present a
prototype of a system where data parallel workloads can
be executed on either CPU or a GPU, with the runtime
system transparently selecting the best processing
unit, depending on the type of workload as well as the
architecture and current load of the execution
platform. We evaluate performance of our system on a
variety of benchmarks, representing different types of
workloads: physics, image processing, scientific
computing and cryptography.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Ardo:2013:LAO,
author = "H{\aa}kan Ard{\"o} and Carl Friedrich Bolz and Maciej
Fija{\l}kowski",
title = "Loop-aware optimizations in {PyPy}'s tracing {JIT}",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "63--72",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384586",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "One of the nice properties of a tracing just-in-time
compiler (JIT) is that many of its optimizations are
simple, requiring one forward pass only. This is not
true for loop-invariant code motion which is a very
important optimization for code with tight kernels.
Especially for dynamic languages that typically perform
quite a lot of loop invariant type checking, boxed
value unwrapping and virtual method lookups. In this
paper we explain a scheme pioneered within the context
of the LuaJIT project for making basic optimizations
loop-aware by using a simple pre-processing step on the
trace without changing the optimizations themselves. We
have implemented the scheme in RPython's tracing JIT
compiler. PyPy's Python JIT executing simple numerical
kernels can become up to two times faster, bringing the
performance into the ballpark of static language
compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Wurthinger:2013:SOA,
author = "Thomas W{\"u}rthinger and Andreas W{\"o}{\ss} and
Lukas Stadler and Gilles Duboscq and Doug Simon and
Christian Wimmer",
title = "Self-optimizing {AST} interpreters",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "73--82",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384587",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "An abstract syntax tree (AST) interpreter is a simple
and natural way to implement a programming language.
However, it is also considered the slowest approach
because of the high overhead of virtual method
dispatch. Language implementers therefore define
bytecodes to speed up interpretation, at the cost of
introducing inflexible and hard to maintain bytecode
formats. We present a novel approach to implementing
AST interpreters in which the AST is modified during
interpretation to incorporate type feedback. This tree
rewriting is a general and powerful mechanism to
optimize many constructs common in dynamic programming
languages. Our system is implemented in Java and uses
the static typing and primitive data types of Java
elegantly to avoid the cost of boxed representations of
primitive values in dynamic programming languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Wernli:2013:OFC,
author = "Erwann Wernli and Pascal Maerki and Oscar Nierstrasz",
title = "Ownership, filters and crossing handlers: flexible
ownership in dynamic languages",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "83--94",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384589",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sharing mutable objects can result in broken
invariants, exposure of internal details, and other
subtle bugs. To prevent such issues, it is important to
control accessibility and aliasing of objects. Dynamic
Ownership is an effective way to do so, but its
owner-as-dominator discipline is too restrictive:
objects are either accessible or not. We propose in
this paper to control accessibility and aliasing with
more flexibility using two mechanisms, filters and
crossing handlers. We demonstrate the benefits of the
flexibility offered by these mechanisms, and report on
the adaptation of a Smalltalk web server with our
approach. We conclude that our variant of dynamic
ownership is flexible enough to accommodate an existing
design, while at the same time constraining it enough
to highlight design anomalies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Lerner:2013:DCA,
author = "Benjamin S. Lerner and Dan Grossman",
title = "Detecting conflicts among declarative {UI}
extensions",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "95--106",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384590",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We examine overlays, a flexible aspect-like mechanism
for third-party declarative extensions of declarative
UIs. Overlays can be defined for any markup language
and permit extensions to define new content that is
dynamically woven into a base UI document. While
powerful, overlays are inherently non-modular and may
conflict with each other, by defining duplicate or
contradictory UI components. We construct an abstract
language to capture core overlay semantics, and design
an automatic analysis to detect inter-extension
conflicts. We apply the analysis to a case study of
Firefox extensions, finding several real-world bugs.
Our analysis provides low-level feedback to extension
developers and high-level reports to end users.
Finally, we show how variants of overlays more
expressive than those of Firefox complicate conflict
detection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Steinert:2013:COA,
author = "Bastian Steinert and Damien Cassou and Robert
Hirschfeld",
title = "{CoExist}: overcoming aversion to change",
journal = j-SIGPLAN,
volume = "48",
number = "2",
pages = "107--118",
month = feb,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480360.2384591",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:12 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers make many changes to the program to
eventually find a good solution for a given task. In
this course of change, every intermediate development
state can of value, when, for example, a promising
ideas suddenly turn out inappropriate or the interplay
of objects turns out more complex than initially
expected before making changes. Programmers would
benefit from tool support that provides immediate
access to source code and run-time of previous
development states of interest. We present IDE
extensions, implemented for Squeak/Smalltalk, to
preserve, retrieve, and work with this information.
With such tool support, programmers can work without
worries because they can rely on tools that help them
with whatever their explorations will reveal. They no
longer have to follow certain best practices only to
avoid undesired consequences of hanging code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '12 conference proceedings.",
}
@Article{Damiani:2013:FFD,
author = "Ferruccio Damiani and Luca Padovani and Ina Schaefer",
title = "A formal foundation for dynamic delta-oriented
software product lines",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "1--10",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371403",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Delta-oriented programming (DOP) is a flexible
approach for implementing software product lines
(SPLs). DOP SPLs are implemented by a code base (a set
of delta modules encapsulating changes to
object-oriented programs) and a product line
declaration (providing the connection of the delta
modules with the product features). In this paper, we
extend DOP by the capability to switch the implemented
product configuration at runtime and present a formal
foundation for dynamic DOP. A dynamic DOP SPL is a DOP
SPL with a dynamic reconfiguration graph that specifies
how to switch between different feature configurations.
Dynamic DOP supports (unanticipated) software evolution
such that at runtime, the product line declaration, the
code base and the dynamic reconfiguration graph can be
changed in any (unanticipated) way that preserves the
currently running product. The type system of our
dynamic DOP core calculus ensures that the dynamic
reconfigurations lead to type safe products and do not
cause runtime type errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Thum:2013:FBD,
author = "Thomas Th{\"u}m and Ina Schaefer and Sven Apel and
Martin Hentschel",
title = "Family-based deductive verification of software
product lines",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "11--20",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371404",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A software product line is a set of similar software
products that share a common code base. While software
product lines can be implemented efficiently using
feature-oriented programming, verifying each product
individually does not scale, especially if human effort
is required (e.g., as in interactive theorem proving).
We present a family-based approach of deductive
verification to prove the correctness of a software
product line efficiently. We illustrate and evaluate
our approach for software product lines written in a
feature-oriented dialect of Java and specified using
the Java Modeling Language. We show that the theorem
prover KeY can be used off-the-shelf for this task,
without any modifications. Compared to the individual
verification of each product, our approach reduces the
verification time needed for our case study by more
than 85\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Ryssel:2013:RFM,
author = "Uwe Ryssel and Joern Ploennigs and Klaus Kabitzsch",
title = "Reasoning of feature models from derived features",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "21--30",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371405",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When using product lines, whose variability models are
based on derived features, e.g., Simulink variant
objects, the dependencies among the features are only
described implicitly. This makes it difficult to verify
the mapping of the features to the solution space and
to create a comprehensive overview of the feature
dependencies like in a feature model. In this paper, an
OWL-based approach is presented, which permits the
automatic verification of the feature mapping and an
automatic feature model synthesis for derived features
using OWL reasoning and formal concept analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Rayside:2013:SIA,
author = "Derek Rayside and Vajihollah Montaghami and Francesca
Leung and Albert Yuen and Kevin Xu and Daniel Jackson",
title = "Synthesizing iterators from abstraction functions",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "31--40",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371407",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A technique for synthesizing iterators from
declarative abstraction functions written in a
relational logic specification language is described.
The logic includes a transitive closure operator that
makes it convenient for expressing reachability queries
on linked data structures. Some optimizations,
including tuple elimination, iterator flattening, and
traversal state reduction, are used to improve
performance of the generated iterators. A case study
demonstrates that most of the iterators in the widely
used JDK Collections classes can be replaced with code
synthesized from declarative abstraction functions.
These synthesized iterators perform competitively with
the hand-written originals. In a user study the
synthesized iterators always passed more test cases
than the hand-written ones, were almost always as
efficient, usually took less programmer effort, and
were the qualitative preference of all participants who
provided free-form comments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Hulette:2013:CTT,
author = "Geoffrey C. Hulette and Matthew Sottile and Allen D.
Malony",
title = "Composing typemaps in {Twig}",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "41--49",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371408",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Twig is a language for writing typemaps, programs
which transform the type of a value while preserving
its underlying meaning. Typemaps are typically used by
tools that generate code, such as multi-language
wrapper generators, to automatically convert types as
needed. Twig builds on existing typemap tools in a few
key ways. Twig's typemaps are composable so that
complex transformations may be built from simpler ones.
In addition, Twig incorporates an abstract, formal
model of code generation, allowing it to output code
for different target languages. We describe Twig's
formal semantics and show how the language allows us to
concisely express typemaps. Then, we demonstrate Twig's
utility by building an example typemap.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Axelsen:2013:PTD,
author = "Eyvind W. Axelsen and Stein Krogdahl",
title = "{Package Templates}: a definition by
semantics-preserving source-to-source transformations
to efficient {Java} code",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "50--59",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371409",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Package Templates (PT) is a mechanism designed for
writing reusable modules, called templates, each
consisting of a set of classes that can be adapted to
their use in a program through compile-time
specialization. A template must be instantiated in a
program before its classes can be used. The mechanism
supports type-safe renaming, merging, type
parameterization and refinement in the form of static
additions and overrides that are orthogonal to the
corresponding concepts of ordinary inheritance. In this
paper, we consider PT as an extension to Java, and a PT
program will then consist of a number of Java packages
and templates, where templates are instantiated in
packages or other templates. Our aim and main
contribution is to define the meaning of such a
program, and to show that this definition is
consistent. We first show this for a core subset of PT,
C-PT, and define a set of source-to-source
transformations for converting C-PT programs to plain
Java programs using semantics we have described
informally in previous papers. We can then define the
meaning of a C-PT program in terms of the resulting
Java program. Thus, we have to verify that the
transformations will always convert a legal C-PT
program to a legal Java program. Finally, we briefly
discuss how this approach can be extended to full PT. A
main challenge is to preserve externally visible names
(for classes, methods and fields), and at the same time
prevent unwanted subsequent rebindings caused e.g. by
over-load resolution in the Java compiler. Names that
are bound to declarations in a template should not be
rebound to different declarations by subsequent
compositions or adaptions. In addition to defining the
runtime semantics of PT constructs in terms of their
translation to Java, the transformation rules can also
be seen as a high-level approach to how a compiler for
this language might be implemented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Spacek:2013:ISS,
author = "Petr Spacek and Christophe Dony and Chouki Tibermacine
and Luc Fabresse",
title = "An inheritance system for structural \& behavioral
reuse in component-based software programming",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "60--69",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371411",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the context of Component-based Programming, which
addresses the implementation stage of a component-based
software engineering development process, this paper
describes a specification and an operational
integration of an inheritance system into a
self-contained new component-based programming language
named Compo. Our proposal completes and extends related
works by making it possible to apply inheritance to the
full description of components, i.e. both to structural
(description of provisions and requirements, of
component architecture) and behavioral (full
implementations of services) parts in component
descriptions. Inheritance in Compo is designed to be
used in conjunction with composition to maximize reuse
capabilities and expressive power. Compo implementation
proposes a clear operational solution for inheritance
and for achieving and testing substitutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Zhang:2013:TLC,
author = "Huaxi (Yulin) Zhang and Lei Zhang and Christelle
Urtado and Sylvain Vauttier and Marianne Huchard",
title = "A three-level component model in component based
software development",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "70--79",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371412",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Component-based development promotes a software
development process that focuses on component reuse.
How to describe a desired component before searching in
the repository? How to find an existing component that
fulfills the required functionalities? How to capture
the system personalization based on its constitutive
components' customization? To answer these questions,
this paper claims that components should be described
using three different forms at three development
stages: architecture specification, configuration and
assembly. However, no architecture description language
proposes such a detailed description for components
that supports such a three step component-based
development. This paper proposes a three-level Adl,
named Dedal, that enables the explicit and separate
definitions of component roles, component classes, and
component instances.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Freeman:2013:HLW,
author = "John Freeman and Jaakko J{\"a}rvi and Gabriel Foust",
title = "{HotDrink}: a library for {Web} user interfaces",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "80--83",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371413",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "HotDrink is a JavaScript library for constructing
forms, dialogs, and other common user interfaces for
Web applications. With HotDrink, instead of writing
event handlers, developers declare a ``view-model'' in
JavaScript and a set of ``bindings'' between the
view-model and the HTML elements comprising the view.
These specifications tend to be small, but they are
enough for HotDrink to provide a fully operational GUI
with multi-way dataflows, enabling/disabling of values,
activation/deactivation of commands, and data
validation. HotDrink implements these rich behaviors,
expected of high-quality user interfaces, as generic
reusable algorithms. This paper/tool demonstration
introduces developers to the HotDrink library by
stepping through the construction of an example web
application GUI. The library is a concrete realization
of our prior work on the ``property models'' approach
to declarative GUI programming. To encourage adoption
among developers, we have packaged the technology
following established web programming conventions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Riche:2013:PSA,
author = "T. L. Rich{\'e} and R. Gon{\c{c}}alves and B. Marker
and D. Batory",
title = "Pushouts in software architecture design",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "84--92",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371415",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A classical approach to program derivation is to
progressively extend a simple specification and then
incrementally refine it to an implementation. We claim
this approach is hard or impractical when reverse
engineering legacy software architectures. We present a
case study that shows optimizations and pushouts ---in
addition to refinements and extensions---are essential
for practical stepwise development of complex software
architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Bagheri:2013:PSD,
author = "Hamid Bagheri and Kevin Sullivan",
title = "{Pol}: specification-driven synthesis of architectural
code frameworks for platform-based applications",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "93--102",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371416",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developing applications that use complex platforms for
functionalities such as authentication and messaging is
hard. Model-driven engineering promises to help, but
transformation systems are themselves hard to produce.
We contribute a new approach using constraint-based
synthesis of partial code frameworks that developers
complete by hand without the need for hand-coded
transformation systems. Rather, synthesis is driven by
formal, partial specifications of target platforms and
application architectures, and by design (code)
fragments encoding application-specific platform usage
patterns. We present results of an early evaluation
using the case study method to test hypotheses of
feasibility and potential industrial utility, using a
laboratory model of a nationwide health information
network as a subject system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Bauer:2013:FPA,
author = "Tim Bauer and Martin Erwig and Alan Fern and Jervis
Pinto",
title = "Faster program adaptation through reward attribution
inference",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "103--111",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371417",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the adaptation-based programming (ABP) paradigm,
programs may contain variable parts (function calls,
parameter values, etc.) that can be take a number of
different values. Programs also contain reward
statements with which a programmer can provide feedback
about how well a program is performing with respect to
achieving its goals (for example, achieving a high
score on some scale). By repeatedly running the
program, a machine learning component will, guided by
the rewards, gradually adjust the automatic choices
made in the variable program parts so that they
converge toward an optimal strategy. ABP is a method
for semi-automatic program generation in which the
choices and rewards offered by programmers allow
standard machine-learning techniques to explore a
design space defined by the programmer to find an
optimal instance of a program template. ABP effectively
provides a DSL that allows non-machine-learning experts
to exploit machine learning to generate self-optimizing
programs. Unfortunately, in many cases the placement
and structuring of choices and rewards can have a
detrimental effect on how an optimal solution to a
program-generation problem can be found. To address
this problem, we have developed a dataflow analysis
that computes influence tracks of choices and rewards.
This information can be exploited by an augmented
machine-learning technique to ignore misleading rewards
and to generally attribute rewards better to the
choices that have actually influenced them. Moreover,
this technique allows us to detect errors in the
adaptive program that might arise out of program
maintenance. Our evaluation shows that the dataflow
analysis can lead to improvements in performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Efftinge:2013:XID,
author = "Sven Efftinge and Moritz Eysholdt and Jan K{\"o}hnlein
and Sebastian Zarnekow and Robert von Massow and
Wilhelm Hasselbring and Michael Hanus",
title = "{Xbase}: implementing domain-specific languages for
{Java}",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "112--121",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371419",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Xtext is an open-source framework for implementing
external, textual domain-specific languages (DSLs). So
far, most DSLs implemented with Xtext and similar tools
focus on structural aspects such as service
specifications and entities. Because behavioral aspects
are significantly more complicated to implement, they
are often delegated to general-purpose programming
languages. This approach introduces complex integration
patterns and the DSL's high level of abstraction is
compromised. We present Xbase as part of Xtext, an
expression language that can be reused via language
inheritance in any DSL implementation based on Xtext.
Xbase expressions provide both control structures and
program expressions in a uniform way. Xbase is
statically typed and tightly integrated with the Java
type system. Languages extending Xbase inherit the
syntax of a Java-like expression language as well as
language infrastructure components, including a parser,
an unparser, a linker, a compiler and an interpreter.
Furthermore, the framework provides integration into
the Eclipse IDE including debug and refactoring
support. The application of Xbase is presented by means
of a domain model language which serves as a tutorial
example and by the implementation of the programming
language Xtend. Xtend is a functional and
object-oriented general purpose language for the Java
Virtual Machine (JVM). It is built on top of Xbase
which is the reusable expression language that is the
foundation of Xtend.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Rafkind:2013:HSE,
author = "Jon Rafkind and Matthew Flatt",
title = "{Honu}: syntactic extension for algebraic notation
through enforestation",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "122--131",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371420",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Honu is a new language that fuses traditional
algebraic notation (e.g., infix binary operators) with
Scheme-style language extensibility. A key element of
Honu's design is an enforestation parsing step, which
converts a flat stream of tokens into an
S-expression-like tree, in addition to the initial
``read'' phase of parsing and interleaved with the
``macro-expand'' phase. We present the design of Honu,
explain its parsing and macro-extension algorithm, and
show example syntactic extensions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Walkingshaw:2013:CMI,
author = "Eric Walkingshaw and Martin Erwig",
title = "A calculus for modeling and implementing variation",
journal = j-SIGPLAN,
volume = "48",
number = "3",
pages = "132--140",
month = mar,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2480361.2371421",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:18 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a formal calculus for modeling and
implementing variation in software. It unifies the
compositional and annotative approaches to feature
implementation and supports the development of
abstractions that can be used to directly relate
feature models to their implementation. Since the
compositional and annotative approaches are
complementary, the calculus enables implementers to use
the best combination of tools for the job and focus on
inherent feature interactions, rather than those
introduced by biases in the representation. The
calculus also supports the abstraction of recurring
variational patterns and provides a metaprogramming
platform for organizing variation in artifacts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Bond:2013:GDG,
author = "Michael Bond",
title = "{GPUDet}: a deterministic {GPU} architecture",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "1--12",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451118",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nondeterminism is a key challenge in developing
multithreaded applications. Even with the same input,
each execution of a multithreaded program may produce a
different output. This behavior complicates debugging
and limits one's ability to test for correctness. This
non-reproducibility situation is aggravated on
massively parallel architectures like graphics
processing units (GPUs) with thousands of concurrent
threads. We believe providing a deterministic
environment to ease debugging and testing of GPU
applications is essential to enable a broader class of
software to use GPUs. Many hardware and software
techniques have been proposed for providing determinism
on general-purpose multi-core processors. However,
these techniques are designed for small numbers of
threads. Scaling them to thousands of threads on a GPU
is a major challenge. This paper proposes a scalable
hardware mechanism, GPUDet, to provide determinism in
GPU architectures. In this paper we characterize the
existing deterministic and nondeterministic aspects of
current GPU execution models, and we use these
observations to inform GPUDet's design. For example,
GPUDet leverages the inherent determinism of the SIMD
hardware in GPUs to provide determinism within a
wavefront at no cost. GPUDet also exploits the Z-Buffer
Unit, an existing GPU hardware unit for graphics
rendering, to allow parallel out-of-order memory writes
to produce a deterministic output. Other optimizations
in GPUDet include deterministic parallel execution of
atomic operations and a workgroup-aware algorithm that
eliminates unnecessary global synchronizations. Our
simulation results indicate that GPUDet incurs only 2X
slowdown on average over a baseline nondeterministic
architecture, with runtime overheads as low as 4\% for
compute-bound applications, despite running GPU kernels
with thousands of threads. We also characterize the
sources of overhead for deterministic execution on GPUs
to provide insights for further optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Sung:2013:DEH,
author = "Hyojin Sung and Rakesh Komuravelli and Sarita V.
Adve",
title = "{DeNovoND}: efficient hardware support for disciplined
non-determinism",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "13--26",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451119",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent work has shown that disciplined shared-memory
programming models that provide
deterministic-by-default semantics can simplify both
parallel software and hardware. Specifically, the
DeNovo hardware system has shown that the software
guarantees of such models (e.g., data-race-freedom and
explicit side-effects) can enable simpler, higher
performance, and more energy-efficient hardware than
the current state-of-the-art for deterministic
programs. Many applications, however, contain
non-deterministic parts; e.g., using lock
synchronization. For commercial hardware to exploit the
benefits of DeNovo, it is therefore necessary to extend
DeNovo to support non-deterministic applications. This
paper proposes DeNovoND, a system that supports
lock-based, disciplined non-determinism, with the
simplicity, performance, and energy benefits of DeNovo.
We use a combination of distributed queue-based locks
and access signatures to implement simple memory
consistency semantics for safe non-determinism, with a
coherence protocol that does not require transient
states, invalidation traffic, or directories, and does
not incur false sharing. The resulting system is
simpler, shows comparable or better execution time, and
has 33\% less network traffic on average (translating
directly into energy savings) relative to a
state-of-the-art invalidation-based protocol for 8
applications designed for lock synchronization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Wester:2013:PDR,
author = "Benjamin Wester and David Devecsery and Peter M. Chen
and Jason Flinn and Satish Narayanasamy",
title = "Parallelizing data race detection",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "27--38",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451120",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Detecting data races in multithreaded programs is a
crucial part of debugging such programs, but
traditional data race detectors are too slow to use
routinely. This paper shows how to speed up race
detection by spreading the work across multiple cores.
Our strategy relies on uniparallelism, which executes
time intervals of a program (called epochs ) in
parallel to provide scalability, but executes all
threads from a single epoch on a single core to
eliminate locking overhead. We use several techniques
to make parallelization effective: dividing race
detection into three phases, predicting a subset of the
analysis state, eliminating sequential work via
transitive reduction, and reducing the work needed to
maintain multiple versions of analysis via
factorization. We demonstrate our strategy by
parallelizing a happens-before detector and a
lockset-based detector. We find that uniparallelism can
significantly speed up data race detection. With 4x the
number of cores as the original application, our
strategy speeds up the median execution time by 4.4x
for a happens-before detector and 3.3x for a lockset
race detector. Even on the same number of cores as the
conventional detectors, the ability for uniparallelism
to elide analysis locks allows it to reduce the median
overhead by 13\% for a happens-before detector and 8\%
for a lockset detector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Lucia:2013:CEF,
author = "Brandon Lucia and Luis Ceze",
title = "Cooperative empirical failure avoidance for
multithreaded programs",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "39--50",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451121",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrency errors in multithreaded programs are
difficult to find and fix. We propose Aviso, a system
for avoiding schedule-dependent failures. Aviso
monitors events during a program's execution and, when
a failure occurs, records a history of events from the
failing execution. It uses this history to generate
schedule constraints that perturb the order of events
in the execution and thereby avoids schedules that lead
to failures in future program executions. Aviso
leverages scenarios where many instances of the same
software run, using a statistical model of program
behavior and experimentation to determine which
constraints most effectively avoid failures. After
implementing Aviso, we showed that it decreased failure
rates for a variety of important desktop, server, and
cloud applications by orders of magnitude, with an
average overhead of less than 20\% and, in some cases,
as low as 5\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Goiri:2013:PGM,
author = "{\'I}{\~n}igo Goiri and William Katsak and Kien Le and
Thu D. Nguyen and Ricardo Bianchini",
title = "{Parasol} and {GreenSwitch}: managing datacenters
powered by renewable energy",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "51--64",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451123",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several companies have recently announced plans to
build ``green'' datacenters, i.e. datacenters partially
or completely powered by renewable energy. These
datacenters will either generate their own renewable
energy or draw it directly from an existing nearby
plant. Besides reducing carbon footprints, renewable
energy can potentially reduce energy costs, reduce peak
power costs, or both. However, certain renewable fuels
are intermittent, which requires approaches for
tackling the energy supply variability. One approach is
to use batteries and/or the electrical grid as a backup
for the renewable energy. It may also be possible to
adapt the workload to match the renewable energy
supply. For highest benefits, green datacenter
operators must intelligently manage their workloads and
the sources of energy at their disposal. In this paper,
we first discuss the tradeoffs involved in building
green datacenters today and in the future. Second, we
present Parasol, a prototype green datacenter that we
have built as a research platform. Parasol comprises a
small container, a set of solar panels, a battery bank,
and a grid-tie. Third, we describe GreenSwitch, our
model-based approach for dynamically scheduling the
workload and selecting the source of energy to use. Our
real experiments with Parasol, GreenSwitch, and
MapReduce workloads demonstrate that intelligent
workload and energy source management can produce
significant cost reductions. Our results also isolate
the cost implications of peak power management, storing
energy on the grid, and the ability to delay the
MapReduce jobs. Finally, our results demonstrate that
careful workload and energy source management can
minimize the negative impact of electrical grid
outages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Shen:2013:PCF,
author = "Kai Shen and Arrvindh Shriraman and Sandhya Dwarkadas
and Xiao Zhang and Zhuan Chen",
title = "Power containers: an {OS} facility for fine-grained
power and energy management on multicore servers",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "65--76",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451124",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy efficiency and power capping are critical
concerns in server and cloud computing systems. They
face growing challenges due to dynamic power variations
from new client-directed web applications, as well as
complex behaviors due to multicore resource sharing and
hardware heterogeneity. This paper presents a new
operating system facility called ``power containers''
that accounts for and controls the power and energy
usage of individual fine-grained requests in multicore
servers. This facility relies on three key techniques
--- (1) online model that attributes multicore power
(including shared maintenance power) to concurrently
running tasks, (2) alignment of actual power
measurements and model estimates to enable online model
recalibration, and (3) on-the-fly
application-transparent request tracking in multi-stage
servers to isolate the power and energy contributions
and customize per-request control. Our mechanisms
enable new multicore server management capabilities
including fair power capping that only penalizes
power-hungry requests, and energy-aware request
distribution between heterogeneous servers. Our
evaluation uses three multicore processors (Intel
Woodcrest, Westmere, and SandyBridge) and a variety of
server and cloud computing (Google App Engine)
workloads. Our results demonstrate the high accuracy of
our request power accounting (no more than 11\% errors)
and the effectiveness of container-enabled power virus
isolation and throttling. Our request distribution case
study shows up to 25\% energy saving compared to an
alternative approach that recognizes machine
heterogeneity but not fine-grained workload affinity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Delimitrou:2013:PQA,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{Paragon}: {QoS}-aware scheduling for heterogeneous
datacenters",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "77--88",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451125",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large-scale datacenters (DCs) host tens of thousands
of diverse applications each day. However, interference
between colocated workloads and the difficulty to match
applications to one of the many hardware platforms
available can degrade performance, violating the
quality of service (QoS) guarantees that many cloud
workloads require. While previous work has identified
the impact of heterogeneity and interference, existing
solutions are computationally intensive, cannot be
applied online and do not scale beyond few
applications. We present Paragon, an online and
scalable DC scheduler that is heterogeneity and
interference-aware. Paragon is derived from robust
analytical methods and instead of profiling each
application in detail, it leverages information the
system already has about applications it has previously
seen. It uses collaborative filtering techniques to
quickly and accurately classify an unknown, incoming
workload with respect to heterogeneity and interference
in multiple shared resources, by identifying
similarities to previously scheduled applications. The
classification allows Paragon to greedily schedule
applications in a manner that minimizes interference
and maximizes server utilization. Paragon scales to
tens of thousands of servers with marginal scheduling
overheads in terms of time or state. We evaluate
Paragon with a wide range of workload scenarios, on
both small and large-scale systems, including 1,000
servers on EC2. For a 2,500-workload scenario, Paragon
enforces performance guarantees for 91\% of
applications, while significantly improving
utilization. In comparison, heterogeneity-oblivious,
interference-oblivious and least-loaded schedulers only
provide similar guarantees for 14\%, 11\% and 3\% of
workloads. The differences are more striking in
oversubscribed scenarios where resource efficiency is
more critical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Tang:2013:RRS,
author = "Lingjia Tang and Jason Mars and Wei Wang and Tanima
Dey and Mary Lou Soffa",
title = "{ReQoS}: reactive static\slash dynamic compilation for
{QoS} in warehouse scale computers",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "89--100",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451126",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As multicore processors with expanding core counts
continue to dominate the server market, the overall
utilization of the class of datacenters known as
warehouse scale computers (WSCs) depends heavily on
colocation of multiple workloads on each server to take
advantage of the computational power provided by modern
processors. However, many of the applications running
in WSCs, such as Web search, are user-facing and have
quality of service (QoS) requirements. When multiple
applications are co-located on a multicore machine,
contention for shared memory resources threatens
application QoS as severe cross-core performance
interference may occur. WSC operators are left with two
options: either disregard QoS to maximize WSC
utilization, or disallow the co-location of
high-priority user-facing applications with other
applications, resulting in low machine utilization and
millions of dollars wasted. This paper presents ReQoS,
a static/dynamic compilation approach that enables
low-priority applications to adaptively manipulate
their own contentiousness to ensure the QoS of
high-priority co-runners. ReQoS is composed of a
profile guided compilation technique that identifies
and inserts markers in contentious code regions in
low-priority applications, and a lightweight runtime
that monitors the QoS of high-priority applications and
reactively reduces the pressure low-priority
applications generate to the memory subsystem when
cross-core interference is detected. In this work, we
show that ReQoS can accurately diagnose contention and
significantly reduce performance interference to ensure
application QoS. Applying ReQoS to SPEC2006 and
SmashBench workloads on real multicore machines, we are
able to improve machine utilization by more than 70\%
in many cases, and more than 50\% on average, while
enforcing a 90\% QoS threshold. We are also able to
improve the energy efficiency of modern multicore
machines by 47\% on average over a policy of
disallowing co-locations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Arulraj:2013:PRS,
author = "Joy Arulraj and Po-Chun Chang and Guoliang Jin and
Shan Lu",
title = "Production-run software failure diagnosis via hardware
performance counters",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "101--112",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451128",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sequential and concurrency bugs are widespread in
deployed software. They cause severe failures and huge
financial loss during production runs. Tools that
diagnose production-run failures with low overhead are
needed. The state-of-the-art diagnosis techniques use
software instrumentation to sample program properties
at run time and use off-line statistical analysis to
identify properties most correlated with failures.
Although promising, these techniques suffer from high
run-time overhead, which is sometimes over 100\%, for
concurrency-bug failure diagnosis and hence are not
suitable for production-run usage. We present PBI, a
system that uses existing hardware performance counters
to diagnose production-run failures caused by
sequential and concurrency bugs with low overhead. PBI
is designed based on several key observations. First, a
few widely supported performance counter events can
reflect a wide variety of common software bugs and can
be monitored by hardware with almost no overhead.
Second, the counter overflow interrupt supported by
existing hardware and operating systems provides a
natural and effective mechanism to conduct event
sampling at user level. Third, the noise and
non-determinism in interrupt delivery complements well
with statistical processing. We evaluate PBI using 13
real-world concurrency and sequential bugs from
representative open-source server, client, and utility
programs, and 10 bugs from a widely used
software-testing benchmark. Quantitatively, PBI can
effectively diagnose failures caused by these bugs with
a small overhead that is never higher than 10\%.
Qualitatively, PBI does not require any change to
software and presents a novel use of existing hardware
performance counters.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Zhang:2013:CFC,
author = "Wei Zhang and Marc de Kruijf and Ang Li and Shan Lu
and Karthikeyan Sankaralingam",
title = "{ConAir}: featherweight concurrency bug recovery via
single-threaded idempotent execution",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "113--126",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451129",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many concurrency bugs are hidden in deployed software
and cause severe failures for end-users. When they
finally manifest and become known by developers, they
are difficult to fix correctly. To support end-users,
we need techniques that help software survive hidden
concurrency bugs during production runs. To help
developers, we need techniques that fix exposed
concurrency bugs. The state-of-the-art techniques on
concurrency-bug fixing and survival only satisfy a
subset of four important properties: compatibility,
correctness, generality, and performance.We aim to
develop a system that satisfies all of these four
properties. To achieve this goal, we leverage two
observations: (1) rolling back a single thread is
sufficient to recover from most concurrency-bug
failures; (2) reexecuting an idempotent region, which
requires no memory-state checkpoint, is sufficient to
recover from many concurrency-bug failures. Our system
ConAir includes a static analysis component that
automatically identifies potential failure sites, a
static analysis component that automatically identifies
the idempotent code regions around every failure site,
and a code-transformation component that inserts
rollback-recovery code around the identified idempotent
regions. We evaluated ConAir on 10 real-world
concurrency bugs in widely used C/C++ open-source
applications. These bugs cover different types of
failure symptoms and root causes. Quantitatively,
ConAir helps software survive failures caused by all of
these bugs with negligible run-time overhead ($< 1\%$)
and short recovery time. Qualitatively, ConAir can help
recover from failures caused by unknown bugs. It
guarantees that program semantics remain unchanged and
requires no change to operating systems or hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Viennot:2013:TMR,
author = "Nicolas Viennot and Siddharth Nair and Jason Nieh",
title = "Transparent mutable replay for multicore debugging and
patch validation",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "127--138",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451130",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Dora, a mutable record-replay system which
allows a recorded execution of an application to be
replayed with a modified version of the application.
This feature, not available in previous record-replay
systems, enables powerful new functionality. In
particular, Dora can help reproduce, diagnose, and fix
software bugs by replaying a version of a recorded
application that is recompiled with debugging
information, reconfigured to produce verbose log
output, modified to include additional print
statements, or patched to fix a bug. Dora uses
lightweight operating system mechanisms to record an
application execution by capturing nondeterministic
events to a log without imposing unnecessary timing and
ordering constraints. It replays the log using a
modified version of the application even in the
presence of added, deleted, or modified operations that
do not match events in the log. Dora searches for a
replay that minimizes differences between the log and
the replayed execution of the modified program. If
there are no modifications, Dora provides deterministic
replay of the unmodified program. We have implemented a
Linux prototype which provides transparent mutable
replay without recompiling or relinking applications.
We show that Dora is useful for reproducing,
diagnosing, and fixing software bugs in real-world
applications, including Apache and MySQL. Our results
show that Dora (1) captures bugs and replays them with
applications modified or reconfigured to produce
additional debugging output for root cause diagnosis,
(2) captures exploits and replays them with patched
applications to validate that the patches successfully
eliminate vulnerabilities, (3) records production
workloads and replays them with patched applications to
validate patches with realistic workloads, and (4)
maintains low recording overhead on commodity multicore
hardware, making it suitable for production systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Sahoo:2013:ULI,
author = "Swarup Kumar Sahoo and John Criswell and Chase Geigle
and Vikram Adve",
title = "Using likely invariants for automated software fault
localization",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "139--152",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451131",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose an automatic diagnosis technique for
isolating the root cause(s) of software failures. We
use likely program invariants, automatically generated
using correct inputs that are close to the
fault-triggering input, to select a set of candidate
program locations which are possible root causes. We
then trim the set of candidate root causes using
software-implemented dynamic backwards slicing, plus
two new filtering heuristics: dependence filtering, and
filtering via multiple failing inputs that are also
close to the failing input. Experimental results on
reported software bugs of three large open-source
servers show that we are able to narrow down the number
of candidate bug locations to between 5 and 17 program
expressions, even in programs that are hundreds of
thousands of lines long.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Paulos:2013:REA,
author = "Eric Paulos",
title = "The rise of the expert amateur: {DIY} culture and the
evolution of computer science",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "153--154",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451133",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We are at an important technological inflection point.
Most of our computing systems have been designed and
built by professionally trained experts (i.e. us ---
computer scientists, engineers, and designers) for use
in specific domains and to solve explicit prob-lems.
Artifacts often called ``user manuals'' traditionally
prescribed the appropriate usage of these tools and
implied an acceptable etiquette for interaction and
experience. A fringe group of individuals usually
labeled ``hackers'' or ``amateurs'' or ``makers'' have
challenged this producer-consumer model of technology
by creating novel hardware and software features to
``improve'' our research and products while a similar
creative group of technicians called ``artists'' have
redirected the techniques, tools, and tenets of
accepted technological usage away from their typical
manifestations in practicality and product. Over time
the technological artifacts of these fringe groups and
the support for their rhetoric have gained them a
foothold into computing culture and eroded the
established power discontinuities within the practice
of computing research. We now expect our computing
tools to be driven by an architecture of open
participation and democracy that encourages users to
add value to their tools and applications as they use
them. Similarly, the bar for enabling the design of
novel, personal computing systems and ``hardware
remixes'' has fallen to the point where many
non-experts and novices are readily embracing and
creating fascinating and ingenious computing artifacts
outside of our official and traditionally sanctioned
academic and industrial research communities. But how
have we as ``expert'' practitioners been influencing
this discussion? By constructing a practice around the
design and development of technology for task based and
problem solving applications, we have unintentionally
established such work as the status quo for the human
computing experience. We have failed in our duty to
open up alternate forums for technology to express
itself and touch our lives beyond productivity and
efficiency. Blinded by our quest for ``smart
technologies'' we have forgotten to contemplate the
design of technologies to inspire us to be smarter,
more curious, and more inquisitive. We owe it to
ourselves to rethink the impact we desire to have on
this historic moment in computing culture. We must
choose to participate in and perhaps lead a dialogue
that heralds an expansive new acceptable practice of
designing to enable participation by experts and
non-experts alike. We are in the milieu of the rise of
the ``expert amateur''. We must change our mantra ---
not just performance, completeness, and usability but
openness, usefulness and relevancy to our world, its
citizens, and our environment. This talk will explore
elements of the DIY and maker culture and its relevancy
to research questions across computational hardware,
languages, and systems. Ultimately, this talk will
outline and argue for expanding the design territory
and potential opportunities for all of us to
collaborate and benefit as a society from this cultural
movement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Raghavan:2013:CSH,
author = "Arun Raghavan and Laurel Emurian and Lei Shao and
Marios Papaefthymiou and Kevin P. Pipe and Thomas F.
Wenisch and Milo M. K. Martin",
title = "Computational sprinting on a hardware\slash software
testbed",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "155--166",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451135",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "CMOS scaling trends have led to an inflection point
where thermal constraints (especially in mobile devices
that employ only passive cooling) preclude sustained
operation of all transistors on a chip --- a phenomenon
called ``dark silicon.'' Recent research proposed
computational sprinting --- exceeding sustainable
thermal limits for short intervals --- to improve
responsiveness in light of the bursty computation
demands of many media-rich interactive mobile
applications. Computational sprinting improves
responsiveness by activating reserve cores (parallel
sprinting) and/or boosting frequency/voltage (frequency
sprinting) to power levels that far exceed the system's
sustainable cooling capabilities, relying on thermal
capacitance to buffer heat. Prior work analyzed the
feasibility of sprinting through modeling and
simulation. In this work, we investigate sprinting
using a hardware/software testbed. First, we study
unabridged sprints, wherein the computation completes
before temperature becomes critical, demonstrating a
6.3x responsiveness gain, and a 6\% energy efficiency
improvement by racing to idle. We then analyze
truncated sprints, wherein our software runtime system
must intervene to prevent overheating by throttling
parallelism and frequency before the computation is
complete. To avoid oversubscription penalties (context
switching inefficiencies after a truncated parallel
sprint), we develop a sprint-aware task-based parallel
runtime. We find that maximal-intensity sprinting is
not always best, introduce the concept of sprint
pacing, and evaluate an adaptive policy for selecting
sprint intensity. We report initial results using a
phase change heat sink to extend maximum sprint
duration. Finally, we demonstrate that a
sprint-and-rest operating regime can actually
outperform thermally-limited sustained execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Ahn:2013:DAS,
author = "Wonsun Ahn and Yuelu Duan and Josep Torrellas",
title = "{DeAliaser}: alias speculation using atomic region
support",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "167--180",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451136",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Alias analysis is a critical component in many
compiler optimizations. A promising approach to reduce
the complexity of alias analysis is to use speculation.
The approach consists of performing optimizations
assuming the alias relationships that are true most of
the time, and repairing the code when such
relationships are found not to hold through runtime
checks. This paper proposes a general alias speculation
scheme that leverages upcoming hardware support for
transactions with the help of some ISA extensions. The
ability of transactions to checkpoint and roll back
frees the compiler to pursue aggressive optimizations
without having to worry about recovery code. Also,
exposing the memory conflict detection hardware in
transactions to software allows runtime checking of
aliases with little or no overhead. We test the
potential of the novel alias speculation approach with
Loop Invariant Code Motion (LICM), Global Value
Numbering (GVN), and Partial Redundancy Elimination
(PRE) optimization passes. On average, they are shown
to reduce program execution time by 9\% in SPEC FP2006
applications and 3\% in SPEC INT2006 applications over
the alias analysis of a state-of-the-art compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Park:2013:RCH,
author = "Heekwon Park and Seungjae Baek and Jongmoo Choi and
Donghee Lee and Sam H. Noh",
title = "Regularities considered harmful: forcing randomness to
memory accesses to reduce row buffer conflicts for
multi-core, multi-bank systems",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "181--192",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451137",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a novel kernel-level memory allocator,
called M$^3$ (M-cube, Multi-core Multi-bank Memory
allocator), that has the following two features. First,
it introduces and makes use of a notion of a memory
container, which is defined as a unit of memory that
comprises the minimum number of page frames that can
cover all the banks of the memory organization, by
exclusively assigning a container to a core so that
each core achieves bank parallelism as much as
possible. Second, it orchestrates page frame allocation
so that pages that threads access are dispersed
randomly across multiple banks so that each thread's
access pattern is randomized. The development of M$^3$
is based on a tool that we develop to fully understand
the architectural characteristics of the underlying
memory organization. Using an extension of this tool,
we observe that the same application that accesses
pages in a random manner outperforms one that accesses
pages in a regular pattern such as sequential or same
ordered accesses. This is because such randomized
accesses reduces inter-thread access interference on
the row-buffer in memory banks. We implement M$^3$ in
the Linux kernel version 2.6.32 on the Intel Xeon
system that has 16 cores and 32GB DRAM. Performance
evaluation with various workloads show that M$^3$
improves the overall performance for memory intensive
benchmarks by up to 85\% with an average of about
40\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Honarmand:2013:CUA,
author = "Nima Honarmand and Nathan Dautenhahn and Josep
Torrellas and Samuel T. King and Gilles Pokam and
Cristiano Pereira",
title = "{Cyrus}: unintrusive application-level record-replay
for replay parallelism",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "193--206",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451138",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Architectures for deterministic record-replay (R\&R)
of multithreaded code are attractive for program
debugging, intrusion analysis, and fault-tolerance
uses. However, very few of the proposed designs have
focused on maximizing replay speed --- a key enabling
property of these systems. The few efforts that focus
on replay speed require intrusive hardware or software
modifications, or target whole-system R\&R rather than
the more useful application-level R\&R. This paper
presents the first hardware-based scheme for
unintrusive, application-level R\&R that explicitly
targets high replay speed. Our scheme, called Cyrus,
requires no modification to commodity snoopy cache
coherence. It introduces the concept of an on-the-fly
software Backend Pass during recording which, as the
log is being generated, transforms it for high replay
parallelism. This pass also fixes-up the log, and can
flexibly trade-off replay parallelism for log size. We
analyze the performance of Cyrus using full system (OS
plus hardware) simulation. Our results show that Cyrus
has negligible recording overhead. In addition, for
8-processor runs of SPLASH-2, Cyrus attains an average
replay parallelism of 5, and a replay speed that is, on
average, only about 50\% lower than the recording
speed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{deOliveira:2013:WYS,
author = "Augusto Born de Oliveira and Sebastian Fischmeister
and Amer Diwan and Matthias Hauswirth and Peter F.
Sweeney",
title = "Why you should care about quantile regression",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "207--218",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451140",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Research has shown that correctly conducting and
analysing computer performance experiments is
difficult. This paper investigates what is necessary to
conduct successful computer performance evaluation by
attempting to repeat a prior experiment: the comparison
between two Linux schedulers. In our efforts, we found
that exploring an experimental space through a series
of incremental experiments can be inconclusive, and
there may be no indication of how much experimentation
will be enough. Analysis of variance (ANOVA), a
traditional analysis method, is able to partly solve
the problems with the previous approach, but we
demonstrate that ANOVA can be insufficient for proper
analysis due to the requirements it imposes on the
data. Finally, we demonstrate the successful
application of quantile regression, a recent
development in statistics, to computer performance
experiments. Quantile regression can provide more
insight into the experiment than ANOVA, with the
additional benefit of being applicable to data from any
distribution. This property makes it especially useful
in our field, since non-normally distributed data is
common in computer experiments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Curtsinger:2013:SSS,
author = "Charlie Curtsinger and Emery D. Berger",
title = "{STABILIZER}: statistically sound performance
evaluation",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "219--228",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451141",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Researchers and software developers require effective
performance evaluation. Researchers must evaluate
optimizations or measure overhead. Software developers
use automatic performance regression tests to discover
when changes improve or degrade performance. The
standard methodology is to compare execution times
before and after applying changes. Unfortunately,
modern architectural features make this approach
unsound. Statistically sound evaluation requires
multiple samples to test whether one can or cannot
(with high confidence) reject the null hypothesis that
results are the same before and after. However, caches
and branch predictors make performance dependent on
machine-specific parameters and the exact layout of
code, stack frames, and heap objects. A single binary
constitutes just one sample from the space of program
layouts, regardless of the number of runs. Since
compiler optimizations and code changes also alter
layout, it is currently impossible to distinguish the
impact of an optimization from that of its layout
effects. This paper presents Stabilizer, a system that
enables the use of the powerful statistical techniques
required for sound performance evaluation on modern
architectures. Stabilizer forces executions to sample
the space of memory configurations by repeatedly
re-randomizing layouts of code, stack, and heap objects
at runtime. Stabilizer thus makes it possible to
control for layout effects. Re-randomization also
ensures that layout effects follow a Gaussian
distribution, enabling the use of statistical tests
like ANOVA. We demonstrate Stabilizer's efficiency
($< 7\%$ median overhead) and its effectiveness by
evaluating the impact of LLVM's optimizations on the
SPEC CPU2006 benchmark suite. We find that, while -O2
has a significant impact relative to -O1, the
performance impact of -O3 over -O2 optimizations is
indistinguishable from random noise.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Gidra:2013:SSS,
author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and
Marc Shapiro",
title = "A study of the scalability of stop-the-world garbage
collectors on multicores",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "229--240",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451142",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large-scale multicore architectures create new
challenges for garbage collectors (GCs). In particular,
throughput-oriented stop-the-world algorithms
demonstrate good performance with a small number of
cores, but have been shown to degrade badly beyond
approximately 8 cores on a 48-core with OpenJDK 7. This
negative result raises the question whether the
stop-the-world design has intrinsic limitations that
would require a radically different approach. Our study
suggests that the answer is no, and that there is no
compelling scalability reason to discard the existing
highly-optimised throughput-oriented GC code on
contemporary hardware. This paper studies the default
throughput-oriented garbage collector of OpenJDK 7,
called Parallel Scavenge. We identify its bottlenecks,
and show how to eliminate them using well-established
parallel programming techniques. On the SPECjbb2005,
SPECjvm2008 and DaCapo 9.12 benchmarks, the improved GC
matches the performance of Parallel Scavenge at low
core count, but scales well, up to 48 cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{McFarlin:2013:DDO,
author = "Daniel S. McFarlin and Charles Tucker and Craig
Zilles",
title = "Discerning the dominant out-of-order performance
advantage: is it speculation or dynamism?",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "241--252",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451143",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we set out to study the performance
advantages of an Out-of-Order (OOO) processor relative
to in-order processors with similar execution
resources. In particular, we try to tease apart the
performance contributions from two sources: the
improved sched- ules enabled by OOO hardware
speculation support and its ability to generate
different schedules on different occurrences of the
same instructions based on operand and functional unit
availability. We find that the ability to express good
static schedules achieves the bulk of the speedup
resulting from OOO. Specifically, of the 53\% speedup
achieved by OOO relative to a similarly provisioned in-
order machine, we find that 88\% of that speedup can be
achieved by using a single ``best'' static schedule as
suggested by observing an OOO schedule of the code. We
discuss the ISA mechanisms that would be required to
express these static schedules. Furthermore, we find
that the benefits of dynamism largely come from two
kinds of events that influence the application's
critical path: load instructions that miss in the cache
only part of the time and branch mispredictions. We
find that much of the benefit of OOO dynamism can be
achieved by the potentially simpler task of addressing
these two behaviors directly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Checkoway:2013:IAW,
author = "Stephen Checkoway and Hovav Shacham",
title = "{Iago} attacks: why the system call {API} is a bad
untrusted {RPC} interface",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "253--264",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451145",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In recent years, researchers have proposed systems for
running trusted code on an untrusted operating system.
Protection mechanisms deployed by such systems keep a
malicious kernel from directly manipulating a trusted
application's state. Under such systems, the
application and kernel are, conceptually, peers, and
the system call API defines an RPC interface between
them. We introduce Iago attacks, attacks that a
malicious kernel can mount in this model. We show how a
carefully chosen sequence of integer return values to
Linux system calls can lead a supposedly protected
process to act against its interests, and even to
undertake arbitrary computation at the malicious
kernel's behest. Iago attacks are evidence that
protecting applications from malicious kernels is more
difficult than previously realized.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Hofmann:2013:ISA,
author = "Owen S. Hofmann and Sangman Kim and Alan M. Dunn and
Michael Z. Lee and Emmett Witchel",
title = "{InkTag}: secure applications on an untrusted
operating system",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "265--278",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451146",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "InkTag is a virtualization-based architecture that
gives strong safety guarantees to high-assurance
processes even in the presence of a malicious operating
system. InkTag advances the state of the art in
untrusted operating systems in both the design of its
hypervisor and in the ability to run useful
applications without trusting the operating system. We
introduce paraverification, a technique that simplifies
the InkTag hypervisor by forcing the untrusted
operating system to participate in its own
verification. Attribute-based access control allows
trusted applications to create decentralized access
control policies. InkTag is also the first system of
its kind to ensure consistency between secure data and
metadata, ensuring recoverability in the face of system
crashes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Giuffrida:2013:SAL,
author = "Cristiano Giuffrida and Anton Kuijsten and Andrew S.
Tanenbaum",
title = "Safe and automatic live update for operating systems",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "279--292",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451147",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Increasingly many systems have to run all the time
with no downtime allowed. Consider, for example,
systems controlling electric power plants and e-banking
servers. Nevertheless, security patches and a constant
stream of new operating system versions need to be
deployed without stopping running programs. These
factors naturally lead to a pressing demand for live
update---upgrading all or parts of the operating system
without rebooting. Unfortunately, existing solutions
require significant manual intervention and thus work
reliably only for small operating system patches. In
this paper, we describe an automated system for live
update that can safely and automatically handle major
upgrades without rebooting. We have implemented our
ideas in Proteos, a new research OS designed with live
update in mind. Proteos relies on system support and
nonintrusive instrumentation to handle even very
complex updates with minimal manual effort. The key
novelty is the idea of state quiescence, which allows
updates to happen only in safe and predictable system
states. A second novelty is the ability to
automatically perform transactional live updates at the
process level, ensuring a safe and stable update
process. Unlike prior solutions, Proteos supports
automated state transfer, state checking, and hot
rollback. We have evaluated Proteos on 50 real updates
and on novel live update scenarios. The results show
that our techniques can effectively support both simple
and complex updates, while outperforming prior
solutions in terms of flexibility, security,
reliability, and stability of the update process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Mai:2013:VSI,
author = "Haohui Mai and Edgar Pek and Hui Xue and Samuel
Talmadge King and Parthasarathy Madhusudan",
title = "Verifying security invariants in {ExpressOS}",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "293--304",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451148",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Security for applications running on mobile devices is
important. In this paper we present ExpressOS, a new OS
for enabling high-assurance applications to run on
commodity mobile devices securely. Our main
contributions are a new OS architecture and our use of
formal methods for proving key security invariants
about our implementation. In our use of formal methods,
we focus solely on proving that our OS implements our
security invariants correctly, rather than striving for
full functional correctness, requiring significantly
less verification effort while still proving the
security relevant aspects of our system. We built
ExpressOS, analyzed its security, and tested its
performance. Our evaluation shows that the performance
of ExpressOS is comparable to an Android-based system.
In one test, we ran the same web browser on ExpressOS
and on an Android-based system, and found that
ExpressOS adds 16\% overhead on average to the page
load latency time for nine popular web sites.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Schkufza:2013:SS,
author = "Eric Schkufza and Rahul Sharma and Alex Aiken",
title = "Stochastic superoptimization",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "305--316",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451150",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We formulate the loop-free binary superoptimization
task as a stochastic search problem. The competing
constraints of transformation correctness and
performance improvement are encoded as terms in a cost
function, and a Markov Chain Monte Carlo sampler is
used to rapidly explore the space of all possible
programs to find one that is an optimization of a given
target program. Although our method sacrifices
completeness, the scope of programs we are able to
consider, and the resulting quality of the programs
that we produce, far exceed those of existing
superoptimizers. Beginning from binaries compiled by
llvm -O0 for 64-bit x86, our prototype implementation,
STOKE, is able to produce programs which either match
or outperform the code produced by gcc -O3, icc -O3,
and in some cases, expert handwritten assembly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Schulte:2013:ARB,
author = "Eric Schulte and Jonathan DiLorenzo and Westley Weimer
and Stephanie Forrest",
title = "Automated repair of binary and assembly programs for
cooperating embedded devices",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "317--328",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451151",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a method for automatically repairing
arbitrary software defects in embedded systems, which
have limited memory, disk and CPU capacities, but exist
in great numbers. We extend evolutionary computation
(EC) algorithms that search for valid repairs at the
source code level to assembly and ELF format binaries,
compensating for limited system resources with several
algorithmic innovations. Our method does not require
access to the source code or build toolchain of the
software under repair, does not require program
instrumentation, specialized execution environments, or
virtual machines, or prior knowledge of the bug type.
We repair defects in ARM and x86 assembly as well as
ELF binaries, observing decreases of 86\% in memory and
95\% in disk requirements, with 62\% decrease in repair
time, compared to similar source-level techniques.
These advances allow repairs previously possible only
with C source code to be applied to any ARM or x86
assembly or ELF executable. Efficiency gains are
achieved by introducing stochastic fault localization,
with much lower overhead than comparable deterministic
methods, and low-level program representations. When
distributed over multiple devices, our algorithm finds
repairs faster than predicted by naive parallelism.
Four devices using our approach are five times more
efficient than a single device because of our
collaboration model. The algorithm is implemented on
Nokia N900 smartphones, with inter-phone communication
fitting in 900 bytes sent in 7 SMS text messages per
device per repair on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Cui:2013:VSR,
author = "Heming Cui and Gang Hu and Jingyue Wu and Junfeng
Yang",
title = "Verifying systems rules using rule-directed symbolic
execution",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "329--342",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451152",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Systems code must obey many rules, such as ``opened
files must be closed.'' One approach to verifying rules
is static analysis, but this technique cannot infer
precise runtime effects of code, often emitting many
false positives. An alternative is symbolic execution,
a technique that verifies program paths over all inputs
up to a bounded size. However, when applied to verify
rules, existing symbolic execution systems often
blindly explore many redundant program paths while
missing relevant ones that may contain bugs. Our key
insight is that only a small portion of paths are
relevant to rules, and the rest (majority) of paths are
irrelevant and do not need to be verified. Based on
this insight, we create WOODPECKER, a new symbolic
execution system for effectively checking rules on
systems programs. It provides a set of builtin checkers
for common rules, and an interface for users to easily
check new rules. It directs symbolic execution toward
the program paths relevant to a checked rule, and
soundly prunes redundant paths, exponentially speeding
up symbolic execution. It is designed to be
heuristic-agnostic, enabling users to leverage existing
powerful search heuristics. Evaluation on 136 systems
programs totaling 545K lines of code, including some of
the most widely used programs, shows that, with a time
limit of typically just one hour for each verification
run, WOODPECKER effectively verifies 28.7\% of the
program and rule combinations over bounded input,
whereas an existing symbolic execution system KLEE
verifies only 8.5\%. For the remaining combinations,
WOODPECKER verifies 4.6 times as many relevant paths as
KLEE. With a longer time limit, WOODPECKER verifies
much more paths than KLEE, e.g., 17 times as many with
a fourhour limit. WOODPECKER detects 113 rule
violations, including 10 serious data loss errors with
2 most serious ones already confirmed by the
corresponding developers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Xiang:2013:HHO,
author = "Xiaoya Xiang and Chen Ding and Hao Luo and Bin Bao",
title = "{HOTL}: a higher order theory of locality",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "343--356",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451153",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The locality metrics are many, for example, miss ratio
to test performance, data footprint to manage cache
sharing, and reuse distance to analyze and optimize a
program. It is unclear how different metrics are
related, whether one subsumes another, and what
combination may represent locality completely. This
paper first derives a set of formulas to convert
between five locality metrics and gives the condition
for correctness. The transformation is analogous to
differentiation and integration used to convert between
higher order polynomials. As a result, these metrics
can be assigned an order and organized into a
hierarchy. Using the new theory, the paper then
develops two techniques: one measures the locality in
real time without special hardware support, and the
other predicts multicore cache interference without
parallel testing. The paper evaluates them using
sequential and parallel programs as well as for a
parallel mix of sequential programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Kang:2013:HPP,
author = "Hui Kang and Jennifer L. Wong",
title = "To hardware prefetch or not to prefetch?: a
virtualized environment study and core binding
approach",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "357--368",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451155",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most hardware and software vendors suggest disabling
hardware prefetching in virtualized environments. They
claim that prefetching is detrimental to application
performance due to inaccurate prediction caused by
workload diversity and VM interference on shared cache.
However, no comprehensive or quantitative measurements
to support this belief have been performed. This paper
is the first to systematically measure the influence of
hardware prefetching in virtualized environments. We
examine a wide variety of benchmarks on three types of
chip-multiprocessors (CMPs) to analyze the hardware
prefetching performance. We conduct extensive
experiments by taking into account a number of
important virtualization factors. We find that hardware
prefetching has minimal destructive influence under
most configurations. Only with certain application
combinations does prefetching influence the overall
performance. To leverage these findings and make
hardware prefetching effective across a diversity of
virtualized environments, we propose a dynamic
prefetching-aware VCPU-core binding approach (PAVCB),
which includes two phases --- classifying and binding.
The workload of each VM is classified into different
cache sharing constraint categories based upon its
cache access characteristics, considering both prefetch
requests and demand requests. Then following heuristic
rules, the VCPUs of each VM are scheduled onto
appropriate cores subject to cache sharing constraints.
We show that the proposed approach can improve
performance by 12\% on average over the default
scheduler and 46\% over manual system administrator
bindings across different workload combinations in the
presence of hardware prefetching.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Kim:2013:DBC,
author = "Hwanju Kim and Sangwook Kim and Jinkyu Jeong and
Joonwon Lee and Seungryoul Maeng",
title = "Demand-based coordinated scheduling for {SMP VMs}",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "369--380",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451156",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As processor architectures have been enhancing their
computing capacity by increasing core counts,
independent workloads can be consolidated on a single
node for the sake of high resource efficiency in data
centers. With the prevalence of virtualization
technology, each individual workload can be hosted on a
virtual machine for strong isolation between co-located
workloads. Along with this trend, hosted applications
have increasingly been multithreaded to take advantage
of improved hardware parallelism. Although the
performance of many multithreaded applications highly
depends on communication (or synchronization) latency,
existing schemes of virtual machine scheduling do not
explicitly coordinate virtual CPUs based on their
communication behaviors. This paper presents a
demand-based coordinated scheduling scheme for
consolidated virtual machines that host multithreaded
workloads. To this end, we propose communication-driven
scheduling that controls time-sharing in response to
inter-processor interrupts (IPIs) between virtual CPUs.
On the basis of in-depth analysis on the relationship
between IPI communications and coordination demands, we
devise IPI-driven coscheduling and delayed preemption
schemes, which effectively reduce synchronization
latency and unnecessary CPU consumption. In addition,
we introduce a load-conscious CPU allocation policy in
order to address load imbalance in heterogeneously
consolidated environments. The proposed schemes are
evaluated with respect to various scenarios of mixed
workloads using the PARSEC multithreaded applications.
In the evaluation, our scheme improves the overall
performance of consolidated workloads, especially
communication-intensive applications, by reducing
inefficient synchronization latency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Dashti:2013:TMH,
author = "Mohammad Dashti and Alexandra Fedorova and Justin
Funston and Fabien Gaud and Renaud Lachaize and
Baptiste Lepers and Vivien Quema and Mark Roth",
title = "Traffic management: a holistic approach to memory
placement on {NUMA} systems",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "381--394",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451157",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "NUMA systems are characterized by Non-Uniform Memory
Access times, where accessing data in a remote node
takes longer than a local access. NUMA hardware has
been built since the late 80's, and the operating
systems designed for it were optimized for access
locality. They co-located memory pages with the threads
that accessed them, so as to avoid the cost of remote
accesses. Contrary to older systems, modern NUMA
hardware has much smaller remote wire delays, and so
remote access costs per se are not the main concern for
performance, as we discovered in this work. Instead,
congestion on memory controllers and interconnects,
caused by memory traffic from data-intensive
applications, hurts performance a lot more. Because of
that, memory placement algorithms must be redesigned to
target traffic congestion. This requires an arsenal of
techniques that go beyond optimizing locality. In this
paper we describe Carrefour, an algorithm that
addresses this goal. We implemented Carrefour in Linux
and obtained performance improvements of up to 3.6
relative to the default kernel, as well as significant
improvements compared to NUMA-aware patch sets
available for Linux. Carrefour never hurts performance
by more than 4\% when memory placement cannot be
improved. We present the design of Carrefour, the
challenges of implementing it on modern hardware, and
draw insights about hardware support that would help
optimize system software on future NUMA systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Jog:2013:OCT,
author = "Adwait Jog and Onur Kayiran and Nachiappan Chidambaram
Nachiappan and Asit K. Mishra and Mahmut T. Kandemir
and Onur Mutlu and Ravishankar Iyer and Chita R. Das",
title = "{OWL}: cooperative thread array aware scheduling
techniques for improving {GPGPU} performance",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "395--406",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451158",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging GPGPU architectures, along with programming
models like CUDA and OpenCL, offer a cost-effective
platform for many applications by providing high thread
level parallelism at lower energy budgets.
Unfortunately, for many general-purpose applications,
available hardware resources of a GPGPU are not
efficiently utilized, leading to lost opportunity in
improving performance. A major cause of this is the
inefficiency of current warp scheduling policies in
tolerating long memory latencies. In this paper, we
identify that the scheduling decisions made by such
policies are agnostic to thread-block, or cooperative
thread array (CTA), behavior, and as a result
inefficient. We present a coordinated CTA-aware
scheduling policy that utilizes four schemes to
minimize the impact of long memory latencies. The first
two schemes, CTA-aware two-level warp scheduling and
locality aware warp scheduling, enhance per-core
performance by effectively reducing cache contention
and improving latency hiding capability. The third
scheme, bank-level parallelism aware warp scheduling,
improves overall GPGPU performance by enhancing DRAM
bank-level parallelism. The fourth scheme employs
opportunistic memory-side prefetching to further
enhance performance by taking advantage of open DRAM
rows. Evaluations on a 28-core GPGPU platform with
highly memory-intensive applications indicate that our
proposed mechanism can provide 33\% average performance
improvement compared to the commonly-employed
round-robin warp scheduling policy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Pai:2013:IGC,
author = "Sreepathi Pai and Matthew J. Thazhuthaveetil and R.
Govindarajan",
title = "Improving {GPGPU} concurrency with elastic kernels",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "407--418",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451160",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Each new generation of GPUs vastly increases the
resources available to GPGPU programs. GPU programming
models (like CUDA) were designed to scale to use these
resources. However, we find that CUDA programs actually
do not scale to utilize all available resources, with
over 30\% of resources going unused on average for
programs of the Parboil2 suite that we used in our
work. Current GPUs therefore allow concurrent execution
of kernels to improve utilization. In this work, we
study concurrent execution of GPU kernels using
multiprogram workloads on current NVIDIA Fermi GPUs. On
two-program workloads from the Parboil2 benchmark suite
we find concurrent execution is often no better than
serialized execution. We identify that the lack of
control over resource allocation to kernels is a major
serialization bottleneck. We propose transformations
that convert CUDA kernels into elastic kernels which
permit fine-grained control over their resource usage.
We then propose several elastic-kernel aware
concurrency policies that offer significantly better
performance and concurrency compared to the current
CUDA policy. We evaluate our proposals on real hardware
using multiprogrammed workloads constructed from
benchmarks in the Parboil 2 suite. On average, our
proposals increase system throughput (STP) by 1.21x and
improve the average normalized turnaround time (ANTT)
by 3.73x for two-program workloads when compared to the
current CUDA concurrency implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Oh:2013:PAL,
author = "Taewook Oh and Hanjun Kim and Nick P. Johnson and Jae
W. Lee and David I. August",
title = "Practical automatic loop specialization",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "419--430",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451161",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program specialization optimizes a program with
respect to program invariants, including known, fixed
inputs. These invariants can be used to enable
optimizations that are otherwise unsound. In many
applications, a program input induces predictable
patterns of values across loop iterations, yet existing
specializers cannot fully capitalize on this
opportunity. To address this limitation, we present
Invariant-induced Pattern based Loop Specialization
(IPLS), the first fully-automatic specialization
technique designed for everyday use on real
applications. Using dynamic information-flow tracking,
IPLS profiles the values of instructions that depend
solely on invariants and recognizes repeating patterns
across multiple iterations of hot loops. IPLS then
specializes these loops, using those patterns to
predict values across a large window of loop
iterations. This enables aggressive optimization of the
loop; conceptually, this optimization reconstructs
recurring patterns induced by the input as concrete
loops in the specialized binary. IPLS specializes
real-world programs that prior techniques fail to
specialize without requiring hints from the user.
Experiments demonstrate a geomean speedup of 14.1\%
with a maximum speedup of 138\% over the original codes
when evaluated on three script interpreters and eleven
scripts each.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Phothilimthana:2013:PPH,
author = "Phitchaya Mangpo Phothilimthana and Jason Ansel and
Jonathan Ragan-Kelley and Saman Amarasinghe",
title = "Portable performance on heterogeneous architectures",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "431--444",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451162",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Trends in both consumer and high performance computing
are bringing not only more cores, but also increased
heterogeneity among the computational resources within
a single machine. In many machines, one of the greatest
computational resources is now their graphics
coprocessors (GPUs), not just their primary CPUs. But
GPU programming and memory models differ dramatically
from conventional CPUs, and the relative performance
characteristics of the different processors vary widely
between machines. Different processors within a system
often perform best with different algorithms and memory
usage patterns, and achieving the best overall
performance may require mapping portions of programs
across all types of resources in the machine. To
address the problem of efficiently programming machines
with increasingly heterogeneous computational
resources, we propose a programming model in which the
best mapping of programs to processors and memories is
determined empirically. Programs define choices in how
their individual algorithms may work, and the compiler
generates further choices in how they can map to CPU
and GPU processors and memory systems. These choices
are given to an empirical autotuning framework that
allows the space of possible implementations to be
searched at installation time. The rich choice space
allows the autotuner to construct poly-algorithms that
combine many different algorithmic techniques, using
both the CPU and the GPU, to obtain better performance
than any one technique alone. Experimental results show
that algorithmic changes, and the varied use of both
CPUs and GPUs, are necessary to obtain up to a 16.5x
speedup over using a single program configuration for
all architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Mittal:2013:EVE,
author = "Aashish Mittal and Dushyant Bansal and Sorav Bansal
and Varun Sethi",
title = "Efficient virtualization on embedded {Power
Architecture\reg} platforms",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "445--458",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451163",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Power Architecture\reg{} processors are popular and
widespread on embedded systems, and such platforms are
increasingly being used to run virtual machines. While
the Power Architecture meets the Popek-and-Goldberg
virtualization requirements for traditional
trap-and-emulate style virtualization, the performance
overhead of virtualization remains high. For example,
workloads exhibiting a large amount of kernel activity
typically show 3-5x slowdowns over bare-metal. Recent
additions to the Linux kernel contain guest and host
side paravirtual extensions for Power Architecture
platforms. While these extensions improve performance
significantly, they are guest-specific,
guest-intrusive, and cover only a subset of all
possible virtualization optimizations. We present a set
of host-side optimizations that achieve comparable
performance to the aforementioned paravirtual
extensions, on an unmodified guest. Our optimizations
are based on adaptive in-place binary translation.
Unlike the paravirtual approach, our solution is guest
neutral. We implement our ideas in a prototype based on
Qemu/KVM. After our modifications, KVM can boot an
unmodified Linux guest around 2.5x faster. We contrast
our optimization approach with previous similar binary
translation based approaches for the x86 architecture;
in our experience, each architecture presents a unique
set of challenges and optimization opportunities.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Hill:2013:RDC,
author = "Mark D. Hill",
title = "Research directions for {21st Century} computer
systems: {ASPLOS 2013} panel",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "459--460",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451165",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Four recent efforts call out architectural challenges
and opportunities up and down the software/hardware
stack. This panel will discuss, ``What should the
community do to facilitate, transcend, or refute these
partially overlapping visions?'' The panel is chaired
by Mark D. Hill with other panel members not finalized
for the ASPLOS'13 proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Madhavapeddy:2013:ULO,
author = "Anil Madhavapeddy and Richard Mortier and Charalampos
Rotsos and David Scott and Balraj Singh and Thomas
Gazagnaire and Steven Smith and Steven Hand and Jon
Crowcroft",
title = "Unikernels: library operating systems for the cloud",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "461--472",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451167",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present unikernels, a new approach to deploying
cloud services via applications written in high-level
source code. Unikernels are single-purpose appliances
that are compile-time specialised into standalone
kernels, and sealed against modification when deployed
to a cloud platform. In return they offer significant
reduction in image sizes, improved efficiency and
security, and should reduce operational costs. Our
Mirage prototype compiles OCaml code into unikernels
that run on commodity clouds and offer an order of
magnitude reduction in code size without significant
performance penalty. The architecture combines static
type-safety with a single address-space layout that can
be made immutable via a hypervisor extension. Mirage
contributes a suite of type-safe protocol libraries,
and our results demonstrate that the hypervisor is a
platform that overcomes the hardware compatibility
issues that have made past library operating systems
impractical to deploy in the real-world.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Kadav:2013:FGF,
author = "Asim Kadav and Matthew J. Renzelmann and Michael M.
Swift",
title = "Fine-grained fault tolerance using device
checkpoints",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "473--484",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451168",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recovering faults in drivers is difficult compared to
other code because their state is spread across both
memory and a device. Existing driver fault-tolerance
mechanisms either restart the driver and discard its
state, which can break applications, or require an
extensive logging mechanism to replay requests and
recreate driver state. Even logging may be
insufficient, though, if the semantics of requests are
ambiguous. In addition, these systems either require
large subsystems that must be kept up-to-date as the
kernel changes, or require substantial rewriting of
drivers. We present a new driver fault-tolerance
mechanism that provides fine-grained control over the
code protected. Fine-Grained Fault Tolerance (FGFT)
isolates driver code at the granularity of a single
entry point. It executes driver code as a transaction,
allowing roll back if the driver fails. We develop a
novel checkpointing mechanism to save and restore
device state using existing power management code.
Unlike past systems, FGFT can be incrementally deployed
in a single driver without the need for a large kernel
subsystem, but at the cost of small modifications to
the driver. In the evaluation, we show that FGFT can
have almost zero runtime cost in many cases, and that
checkpoint-based recovery can reduce the duration of a
failure by 79\% compared to restarting the driver.
Finally, we show that applying FGFT to a driver
requires little effort, and the majority of drivers in
common classes already contain the power-management
code needed for checkpoint/restore.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Silberstein:2013:GIF,
author = "Mark Silberstein and Bryan Ford and Idit Keidar and
Emmett Witchel",
title = "{GPUfs}: integrating a file system with {GPUs}",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "485--498",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451169",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "PU hardware is becoming increasingly general purpose,
quickly outgrowing the traditional but constrained
GPU-as-coprocessor programming model. To make GPUs
easier to program and easier to integrate with existing
systems, we propose making the host's file system
directly accessible from GPU code. GPUfs provides a
POSIX-like API for GPU programs, exploits GPU
parallelism for efficiency, and optimizes GPU file
access by extending the buffer cache into GPU memory.
Our experiments, based on a set of real benchmarks
adopted to use our file system, demonstrate the
feasibility and benefits of our approach. For example,
we demonstrate a simple self-contained GPU program
which searches for a set of strings in the entire tree
of Linux kernel source files over seven times faster
than an eight-core CPU run.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Hunt:2013:DTN,
author = "Nicholas Hunt and Tom Bergan and Luis Ceze and Steven
D. Gribble",
title = "{DDOS}: taming nondeterminism in distributed systems",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "499--508",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451170",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nondeterminism complicates the development and
management of distributed systems, and arises from two
main sources: the local behavior of each individual
node as well as the behavior of the network connecting
them. Taming nondeterminism effectively requires
dealing with both sources. This paper proposes DDOS, a
system that leverages prior work on deterministic
multithreading to offer: (1) space-efficient
record/replay of distributed systems; and (2) fully
deterministic distributed behavior. Leveraging
deterministic behavior at each node makes outgoing
messages strictly a function of explicit inputs. This
allows us to record the system by logging just
message's arrival time, not the contents. Going
further, we propose and implement an algorithm that
makes all communication between nodes deterministic by
scheduling communication onto a global logical
timeline. We implement both algorithms in a system
called DDOS and evaluate our system with parallel
scientific applications, an HTTP/memcached system and a
distributed microbenchmark with a high volume of
peer-to-peer communication. Our results show up to two
orders of magnitude reduction in log size of
record/replay, and that distributed systems can be made
deterministic with an order of magnitude of overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Wang:2013:TEH,
author = "Cheng Wang and Youfeng Wu",
title = "{TSO\_ATOMICITY}: efficient hardware primitive for
{TSO}-preserving region optimizations",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "509--520",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451172",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program optimizations based on data dependences may
not preserve the memory consistency in the programs.
Previous works leverage a hardware ATOMICITY primitive
to restrict the thread interleaving for preserving
sequential consistency in region optimizations.
However, ATOMICITY primitive is over restrictive on the
thread interleaving for optimizing real-world
applications developed with the popular
Total-Store-Ordering (TSO) memory consistency, which is
weaker than sequential consistency. In this paper, we
present a novel hardware TSO\_ATOMICITY primitive,
which has less restriction on the thread interleaving
than ATOMICITY primitive to permit more efficient
program execution than ATOMICITY primitive, but can
still preserve TSO memory consistency in all region
optimizations. Furthermore, TSO\_ATOMICITY primitive
requires similar architecture support as ATOMICITY
primitive and can be implemented with only slight
change to the existing ATOMICITY primitive
implementation. Our experimental results show that in a
start-of-art dynamic binary optimization system on a
large set of workloads, ATOMICITY primitive can only
improve the performance by 4\% on average.
TSO\_ATOMICITY primitive can reduce the overhead
associated with ATOMICITY primitive and improve the
performance by 12\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Jafri:2013:WGI,
author = "Syed Ali Raza Jafri and Gwendolyn Voskuilen and T. N.
Vijaykumar",
title = "{Wait-n-GoTM}: improving {HTM} performance by
serializing cyclic dependencies",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "521--534",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451173",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional memory (TM) has been proposed to
alleviate some key programmability problems in chip
multiprocessors. Most TMs optimistically allow
concurrent transactions, detecting read-write or
write-write conflicts. Upon conflicts, existing
hardware TMs (HTMs) use one of three
conflict-resolution policies: (1) always-abort, (2)
always-wait for some conflicting transactions to
complete, or (3) always-go past conflicts and resolve
acyclic conflicts at commit or abort upon cyclic
dependencies. While each policy has advantages, the
policies degrade performance under contention by
limiting concurrency (always-abort, always-wait) or
incurring late aborts due to cyclic dependencies
(always-go). Thus, while always-go avoids acyclic
aborts, no policy avoids cyclic aborts. We propose
Wait-n-GoTM (WnGTM) to increase concurrency while
avoiding cyclic aborts. We observe that most cyclic
dependencies are caused by threads interleaving
multiple accesses to a few heavily-read-write-shared
delinquent data cache blocks. These accesses occur in
code sections called cycle inducer sections (CISTs).
Accordingly, we propose Wait-n-Go (WnG)
conflict-resolution to avoid many cyclic aborts by
predicting and serializing the CISTs. To support the
WnG policy, we extend previous HTMs to (1) allow
multiple readers and writers, (2) scalably identify
dependencies, and (3) detect cyclic dependencies via
new mechanisms, namely, conflict transactional state,
order-capture, and hardware timestamps, respectively.
In 16-core simulations of STAMP, WnGTM achieves average
speedups of 46\% for higher-contention benchmarks and
28\% for all benchmarks over always-abort (TokenTM)
with low-contention benchmarks remaining unchanged,
compared to always-go (DATM) and always-wait
(LogTM-SE), which perform worse than and 6\% better
than TokenTM, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Qian:2013:VSP,
author = "Xuehai Qian and Josep Torrellas and Benjamin Sahelices
and Depei Qian",
title = "{Volition}: scalable and precise sequential
consistency violation detection",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "535--548",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451174",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sequential Consistency (SC) is the most intuitive
memory model, and SC Violations (SCVs) produce
unintuitive, typically incorrect executions. Most prior
SCV detection schemes have used data races as proxies
for SCVs, which is highly imprecise. Other schemes that
have targeted data-race cycles are either too
conservative or are designed only for two-processor
cycles and snoopy-based systems. This paper presents
Volition, the first hardware scheme that detects SCVs
in a relaxed-consistency machine precisely, in a
scalable manner, and for an arbitrary number of
processors in the cycle. Volition leverages cache
coherence protocol transactions to dynamically detect
cycles in memory-access orders across threads. When a
cycle is about to occur, an exception is triggered.
Volition can be used in both directory- and
snoopy-based coherence protocols. Our simulations of
Volition in a 64-processor multicore with
directory-based coherence running SPLASH-2 and Parsec
programs shows that Volition induces negligible traffic
and execution overhead. In addition, it can detect SCVs
with several processors. Volition is suitable for
on-the-fly use.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Grossman:2013:HSF,
author = "J. P. Grossman and Jeffrey S. Kuskin and Joseph A.
Bank and Michael Theobald and Ron O. Dror and Douglas
J. Ierardi and Richard H. Larson and U. Ben Schafer and
Brian Towles and Cliff Young and David E. Shaw",
title = "Hardware support for fine-grained event-driven
computation in {Anton 2}",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "549--560",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451175",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Exploiting parallelism to accelerate a computation
typically involves dividing it into many small tasks
that can be assigned to different processing elements.
An efficient execution schedule for these tasks can be
difficult or impossible to determine in advance,
however, if there is uncertainty as to when each task's
input data will be available. Ideally, each task would
run in direct response to the arrival of its input
data, thus allowing the computation to proceed in a
fine-grained event-driven manner. Realizing this ideal
is difficult in practice, and typically requires
sacrificing flexibility for performance. In Anton 2, a
massively parallel special-purpose supercomputer for
molecular dynamics simulations, we addressed this
challenge by including a hardware block, called the
dispatch unit, that provides flexible and efficient
support for fine-grained event-driven computation. Its
novel features include a many-to-many mapping from
input data to a set of synchronization counters, and
the ability to prioritize tasks based on their type. To
solve the additional problem of using a fixed set of
synchronization counters to track input data for a
potentially large number of tasks, we created a
software library that allows programmers to treat Anton
2 as an idealized machine with infinitely many
synchronization counters. The dispatch unit, together
with this library, made it possible to simplify our
molecular dynamics software by expressing it as a
collection of independent tasks, and the resulting
fine-grained execution schedule improved overall
performance by up to 16\% relative to a coarse-grained
schedule for precisely the same computation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Vitek:2013:SCR,
author = "Jan Vitek",
title = "{SIGPLAN Chair}'s report",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "1--2",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502510",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gibbons:2013:ASV,
author = "Jeremy Gibbons",
title = "{ACM SIGPLAN Vice-Chair}'s report",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "3--3",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Black:2013:SSR,
author = "Andrew P. Black",
title = "{SIGPLAN Secretary}'s report",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "4--5",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lopes:2013:STR,
author = "Cristina V. Lopes",
title = "{SIGPLAN Treasurer}'s report",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "6--6",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502513",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dreyer:2013:SMI,
author = "Derek Dreyer",
title = "{SIGPLAN} most influential paper awards",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "7--8",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502514",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lawall:2013:SPA,
author = "Julia Lawall and Cristina V. Lopes",
title = "{SIGPLAN Professional Activities Committee} report",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "9--9",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502515",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hind:2013:CRH,
author = "Michael Hind",
title = "{CACM} research highlights annual report",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "10--11",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502516",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dreyer:2013:PP,
author = "Derek Dreyer and John Field and Roberto Giacobazzi and
Michael Hicks and Suresh Jagannathan and Mooly Sagiv
and Peter Sewell and Phil Wadler",
title = "Principles of {POPL}",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "12--16",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Krishnamurthi:2013:AES,
author = "Shriram Krishnamurthi",
title = "Artifact evaluation for software conferences",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "17--21",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software and other digital artifacts are amongst the
most valuable contributions of computer science. Yet
our conferences treat these mostly as second-class
artifacts---especially conferences in the software
sciences, which ought to know better. This article
argues for elevating these other artifacts by making
them part of the evaluation process for papers, and
reports on experience from an iteration of an Artifact
Evaluation Committee for ESEC/FSE 2011.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Flanagan:2013:PES,
author = "Cormac Flanagan and K. Rustan M. Leino and Mark
Lillibridge and Greg Nelson and James B. Saxe and
Raymie Stata",
title = "{PLDI 2002}: Extended static checking for {Java}",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "22--33",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502520",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software development and maintenance are costly
endeavors. The cost can be reduced if more software
defects are detected earlier in the development cycle.
This paper introduces the Extended Static Checker for
Java (ESC/Java), an experimental compile-time program
checker that finds common programming errors. The
checker is powered by verification-condition generation
and automatic theorem-proving techniques. It provides
programmers with a simple annotation language with
which programmer design decisions can be expressed
formally. ESC/Java examines the annotated software and
warns of inconsistencies between the design decisions
recorded in the annotations and the actual code, and
also warns of potential runtime errors in the code.
This paper gives an overview of the checker
architecture and annotation language and describes our
experience applying the checker to tens of thousands of
lines of Java programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Findler:2013:ICH,
author = "Robert Bruce Findler and Matthias Felleisen",
title = "{ICFP 2002}: Contracts for higher-order functions",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "34--45",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Assertions play an important role in the construction
of robust software. Their use in programming languages
dates back to the 1970s. Eiffel, an object-oriented
programming language, wholeheartedly adopted assertions
and developed the ``Design by Contract'' philosophy.
Indeed, the entire object-oriented community recognizes
the value of assertion-based contracts on methods. In
contrast, languages with higher-order functions do not
support assertion-based contracts. Because predicates
on functions are, in general, undecidable, specifying
such predicates appears to be meaningless. Instead, the
functional languages community developed type systems
that statically approximate interesting predicates. In
this paper, we show how to support higher-order
function contracts in a theoretically well-founded and
practically viable manner. Specifically, we introduce
?{$^{CON}$}, a typed lambda calculus with assertions
for higher-order functions. The calculus models the
assertion monitoring system that we employ in Dr
Scheme. We establish basic properties of the model
(type soundness, etc.) and illustrate the usefulness of
contract checking with examples from Dr Scheme's code
base. We believe that the development of an assertion
system for higher-order functions serves two purposes.
On one hand, the system has strong practical potential
because existing type systems simply cannot express
many assertions that programmers would like to state.
On the other hand, an inspection of a large base of
invariants may provide inspiration for the direction of
practical future type system research.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Berger:2013:ORC,
author = "Emery D. Berger and Benjamin G. Zorn and Kathryn S.
McKinley",
title = "{OOPSLA 2002}: Reconsidering custom memory
allocation",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "46--57",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers hoping to achieve performance improvements
often use custom memory allocators. This in-depth study
examines eight applications that use custom allocators.
Surprisingly, for six of these applications, a
state-of-the-art general-purpose allocator (the Lea
allocator) performs as well as or better than the
custom allocators. The two exceptions use regions,
which deliver higher performance (improvements of up to
44\%). Regions also reduce programmer burden and
eliminate a source of memory leaks. However, we show
that the inability of programmers to free individual
objects within regions can lead to a substantial
increase in memory consumption. Worse, this limitation
precludes the use of regions for common programming
idioms, reducing their usefulness. We present a
generalization of general-purpose and region-based
allocators that we call reaps. Reaps are a combination
of regions and heaps, providing a full range of region
semantics with the addition of individual object
deletion. We show that our implementation of reaps
provides high performance, outperforming other
allocators with region-like semantics. We then use a
case study to demonstrate the space advantages and
software engineering benefits of reaps in practice. Our
results indicate that programmers needing fast regions
should use reaps, and that most programmers considering
custom allocators should instead use the Lea
allocator.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bacon:2013:PRT,
author = "David F. Bacon and Perry Cheng and V. T. Rajan",
title = "{POPL 2003}: a real-time garbage collector with low
overhead and consistent utilization",
journal = j-SIGPLAN,
volume = "48",
number = "4S",
pages = "58--71",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2502508.2502523",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 15 15:53:11 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Now that the use of garbage collection in languages
like Java is becoming widely accepted due to the safety
and software engineering benefits it provides, there is
significant interest in applying garbage collection to
hard real-time systems. Past approaches have generally
suffered from one of two major flaws: either they were
not provably real-time, or they imposed large space
overheads to meet the real-time bounds. We present a
mostly non-moving, dynamically defragmenting collector
that overcomes both of these limitations: by avoiding
copying in most cases, space requirements are kept low;
and by fully incrementalizing the collector we are able
to meet real-time bounds. We implemented our algorithm
in the Jikes RVM and show that at real-time resolution
we are able to obtain mutator utilization rates of 45\%
with only 1.6--2.5 times the actual space required by
the application, a factor of 4 improvement in
utilization over the best previously published results.
Defragmentation causes no more than 4\% of the traced
data to be copied.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wu:2013:HSC,
author = "Youfeng Wu",
title = "{HW\slash SW} co-designed acceleration of dynamic
languages",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "1--2",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465555",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic Programming Languages, such as Java,
JavaScript, PHP, Perl, Python, Ruby, etc., are
dominating languages for programming the web. HW/SW
co-designed virtual machine can significantly
accelerate their executions by transparently leveraging
internal HW features via an internal compiler. We also
argue for a common API to interface dynamic languages
with the HW/SW co-designed virtual machine, so that a
single internal compiler can accelerate all major
dynamic languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Khudia:2013:LCC,
author = "Daya Shanker Khudia and Scott Mahlke",
title = "Low cost control flow protection using abstract
control signatures",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "3--12",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465568",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The continual trend of shrinking feature sizes and
reducing voltage levels makes transistors faster and
more efficient. However, it also makes them more
susceptible to transient hardware faults. Transient
faults due to high energy particle strikes or circuit
crosstalk can corrupt the output of a program or cause
it to crash. Previous studies have reported that as
much as 70\% of the transient faults disturb program
control flow, making it critical to protect control
flow. Traditional approaches employ signatures to check
that every control flow transfer in a program is valid.
While having high fault coverage, large performance
overheads are introduced by such detailed checking. We
propose a coarse-grain control flow checking method to
detect transient faults in a cost effective way. Our
software-only approach is centered on the principle of
abstraction: control flow that exhibits simple run-time
properties (e.g., proper path length) is almost always
completely correct. Our solution targets off-the-shelf
commodity embedded systems to provide a low cost
protection against transient faults. The proposed
technique achieves its efficiency by simplifying
signature calculations in each basic block and by
performing checking at a coarse-grain level. The
coarse-grain signature comparison points are obtained
by the use of a region based analysis. In addition, we
propose a technique to protect control flow transfers
via call and return instructions to ensure all control
flow is covered by our technique. Overall, our proposed
technique has an average of 11\% performance overhead
in comparison to 75\% performance overhead of
previously proposed signature based techniques while
maintaining approximately the same degree of fault
coverage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Chen:2013:BEF,
author = "Hao Chen and Chengmo Yang",
title = "Boosting efficiency of fault detection and recovery
through application-specific comparison and
checkpointing",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "13--20",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465562",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While the unending technology scaling has brought
reliability to the forefront of concerns of
semiconductor industry, fault tolerance techniques are
still rarely incorporated into existing designs due to
their high overhead. One fault tolerance scheme that
receives a lot of research attention is duplication and
checkpointing. However, most of the techniques in the
category employ a blind strategy to compare instruction
results, therefore not only generating large overhead
in buffering and verifying these values, but also
inducing unnecessary rollbacks to recover faults that
will never influence subsequent execution. To tackle
these issues, we introduce in this paper an approach
that identifies the minimum set of instruction results
for fault detection and checkpointing. For a given
application, the proposed technique first identifies
the control and data flow information of each execution
hotspot, and then selects only the instruction results
that either influence the final program results or are
needed during re-execution as the comparison set. Our
experimental studies demonstrate that the proposed
hotspot-targeting technique is able to reduce nearly
88\% of the comparison overhead and mask over 38\% of
the total injected faults of all the injected faults
while at the same time delivering full fault
coverage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Stilkerich:2013:JSE,
author = "Isabella Stilkerich and Michael Strotz and Christoph
Erhardt and Martin Hoffmann and Daniel Lohmann and
Fabian Scheler and Wolfgang Schr{\"o}der-Preikschat",
title = "A {JVM} for soft-error-prone embedded systems",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "21--32",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465571",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The reduction of structure sizes in microcontollers,
environmental conditions or low supply voltages
increase the susceptibility of embedded systems to soft
errors. As a result, the employment of fault-detection
and fault-tolerance measures is becoming a mandatory
task even for moderately critical applications.
Accordingly, software-based techniques have recently
gained in popularity, and a multitude of approaches
that differ in the number and frequency of tolerated
errors as well as their associated overhead have been
proposed. Using type-safe programming languages to
isolate critical software components is very popular
among those techniques. An automated application of
fault-detection and fault-tolerance measures based on
the type system of the programming language and static
code analyses is possible. It facilitates an easy
evaluation of the protection characteristics and costs,
as well as the migration of software to new hardware
platforms with different failure rates. Transient
faults, however, are not bound to the application code
secured by the type system, but can also affect the
correctness of the type system itself. Thereby, the
type system might lose its ability to isolate critical
components. As a consequence, it is essential to also
protect the type system itself against soft errors. In
this paper, we show how soft errors can affect the
integrity of the type system. Furthermore, we provide
means to secure it against these faults, thus
preserving its isolating character. These measures can
be applied selectively to achieve a suitable tradeoff
between level of protection and resource consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Finlayson:2013:IPE,
author = "Ian Finlayson and Brandon Davis and Peter Gavin and
Gang-Ryung Uh and David Whalley and Magnus
Sj{\"a}lander and Gary Tyson",
title = "Improving processor efficiency by statically
pipelining instructions",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "33--44",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465559",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A new generation of applications requires reduced
power consumption without sacrificing performance.
Instruction pipelining is commonly used to meet
application performance requirements, but some
implementation aspects of pipelining are inefficient
with respect to energy usage. We propose static
pipelining as a new instruction set architecture to
enable more efficient instruction flow through the
pipeline, which is accomplished by exposing the
pipeline structure to the compiler. While this approach
simplifies hardware pipeline requirements, significant
modifications to the compiler are required. This paper
describes the code generation and compiler
optimizations we implemented to exploit the features of
this architecture. We show that we can achieve
performance and code size improvements despite a very
low-level instruction representation. We also
demonstrate that static pipelining of instructions
reduces energy usage by simplifying hardware, avoiding
many unnecessary operations, and allowing the compiler
to perform optimizations that are not possible on
traditional architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Porpodas:2013:LLA,
author = "Vasileios Porpodas and Marcelo Cintra",
title = "{LUCAS}: latency-adaptive unified cluster assignment
and instruction scheduling",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "45--54",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465565",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Clustered VLIW architectures are statically scheduled
wide-issue architectures that combine the advantages of
wide-issue processors along with the power and
frequency scalability of clustered designs. Being
statically scheduled, they require that the decision of
mapping instructions to clusters be done by the
compiler. State-of-the-art code generation for such
architectures combines cluster-assignment and
instruction scheduling in a single unified pass. The
performance of the generated code, however, is very
susceptible to the inter-cluster communication latency.
This is due to the nature of the two clustering
heuristics used. One is aggressive and works well for
low inter-cluster latencies, while the other is more
conservative and works well only for high latencies. In
this paper we propose LUCAS, a novel unified
cluster-assignment and instruction-scheduling algorithm
that adapts to the inter-cluster latency better than
the existing state-of-the-art schemes. LUCAS is a
hybrid scheme that performs fine-grain switching
between the two state-of-the art clustering heuristics,
leading to better scheduling than either of them. It
generates better performing code for a wide range of
inter-cluster latency values.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Jang:2013:PSP,
author = "Hakbeom Jang and Channoh Kim and Jae W. Lee",
title = "Practical speculative parallelization of
variable-length decompression algorithms",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "55--64",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465557",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Variable-length coding is widely used for efficient
data compression. Typically, the compressor splits the
original data into blocks and compresses each block
with variable-length codes, hence producing
variable-length compressed blocks. Although the
compressor can easily exploit ample block-level
parallelism, it is much more difficult to extract such
coarse-grain parallelism from the decompressor because
a block boundary cannot be located until decompression
of the previous block is completed. This paper presents
novel algorithms to efficiently predict block
boundaries and a runtime system that enables efficient
block-level parallel decompression, called SDM. The SDM
execution model features speculative pipelining with
three stages: Scanner, Decompressor, and Merger. The
scanner stage employs a high-confidence prediction
algorithm that finds compressed block boundaries
without fully decompressing individual blocks. This
information is communicated to the parallel
decompressor stage in which multiple blocks are
decompressed in parallel. The decompressed blocks are
merged in order by the merger stage to produce the
final output. The SDM runtime is specialized to execute
this pipeline correctly and efficiently on
resource-constrained embedded platforms. With SDM we
effectively parallelize three production-grade
variable-length decompression algorithms --- zlib,
bzip2, and H.264 --- with maximum speedups of $ 2.50
\times $ and $ 8.53 \times $ (and geometric mean
speedups of $ 1.96 \times $ and $ 4.04 \times $ ) on
4-core and 36-core embedded platforms, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Chattopadhyay:2013:PPS,
author = "Sudipta Chattopadhyay and Lee Kee Chong and Abhik
Roychoudhury",
title = "Program performance spectrum",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "65--76",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465566",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Real-time and embedded applications often need to
satisfy several non-functional properties such as
timing. Consequently, performance validation is a
crucial stage before the deployment of real-time and
embedded software. Cache memories are often used to
bridge the performance gap between a processor and
memory subsystems. As a result, the analysis of caches
plays a key role in the performance validation of
real-time, embedded software. In this paper, we propose
a novel approach to compute the cache performance
signature of an entire program. Our technique is based
on exploring the input domain through different path
programs. Two paths belong to the same path program if
they follow the same set of control flow edges but may
vary in the iterations of loops encountered. Our
experiments with several subject programs show that the
different paths grouped into a path program have very
similar and often exactly same cache performance. Our
path program exploration can be viewed as partitioning
the input domain of the program. Each partition is
associated with its cache performance and a symbolic
formula capturing the set of program inputs which
constitutes the partition. We show that such a
partitioning technique has wide spread usages in
performance prediction, testing, debugging and design
space exploration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Moreno:2013:NIP,
author = "Carlos Moreno and Sebastian Fischmeister and M. Anwar
Hasan",
title = "Non-intrusive program tracing and debugging of
deployed embedded systems through side-channel
analysis",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "77--88",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465570",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "One of the hardest aspects of embedded software
development is that of debugging, especially when
faulty behavior is observed at the production or
deployment stage. Non-intrusive observation of the
system's behavior is often insufficient to infer the
cause of the problem and identify and fix the bug. In
this work, we present a novel approach for
non-intrusive program tracing aimed at assisting
developers in the task of debugging embedded systems at
deployment or production stage, where standard
debugging tools are usually no longer available. The
technique is rooted in cryptography, in particular the
area of side-channel attacks. Our proposed technique
expands the scope of these cryptographic techniques so
that we recover the sequence of operations from power
consumption observations (power traces). To this end,
we use digital signal processing techniques (in
particular, spectral analysis) combined with pattern
recognition techniques to determine blocks of source
code being executed given the observed power trace. One
of the important highlights of our contribution is the
fact that the system works on a standard PC, capturing
the power traces through the recording input of the
sound card. Experimental results are presented and
confirm that the approach is viable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Beemster:2013:RCD,
author = "Marcel Beemster",
title = "The role of {C} in the dark ages of multi-core",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "89--90",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465556",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Contrary to predictions of its demise, C remains a
dominant programming language, especially in embedded
systems. Speed and transparency dictate that it will be
so for the next decade, despite its supposed
unsuitability for programming parallel architectures. A
flexible compiler development system is a unique
vehicle to bend the C language and its implementation
to the developers' will. Using hard-won experience in
applying extended versions of C to diverse parallel
architectures, C's potential in the dark ages of
multi-core programming is examined.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Wang:2013:FHF,
author = "Tianzheng Wang and Duo Liu and Yi Wang and Zili Shao",
title = "{FTL 2}: a hybrid {\em f\/}lash {\em t\/}ranslation
{\em l\/}ayer with logging for write reduction in flash
memory",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "91--100",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465563",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "NAND flash memory has been widely used to build
embedded devices such as smartphones and solid state
drives (SSD) because of its high performance, low power
consumption, great shock resistance and small form
factor. However, its lifetime and performance are
greatly constrained by partial page updates, which will
lead to early depletion of free pages and frequent
garbage collections. On the one hand, partial page
updates are prevalent as a large portion of I/O does
not modify file contents drastically. On the other
hand, general-purpose cache usually does not
specifically consider and eliminate duplicated
contents, despite its popularity. In this paper, we
propose a hybrid approach called FTL$^2$, which employs
both logging and mapping techniques in flash
translation layer (FTL), to tackle the endurance
problem and performance degradation caused by partial
page updates in flash memory. FTL$^2$ logs the latest
contents in a high-speed temporary storage, called
Content Cache to handle partial page updates.
Experimental results show that FTL$^2$ can greatly
reduce page writes and postpone garbage collections
with a small overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Li:2013:CDW,
author = "Qingan Li and Lei Jiang and Youtao Zhang and Yanxiang
He and Chun Jason Xue",
title = "Compiler directed write-mode selection for high
performance low power volatile {PCM}",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "101--110",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465564",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Micro-Controller Units (MCUs) are widely adopted
ubiquitous computing devices. Due to tight cost and
energy constraints, MCUs often integrate very limited
internal RAM memory on top of Flash storage, which
exposes Flash to heavy write traffic and results in
short system lifetime. Architecting emerging Phase
Change Memory (PCM) is a promising approach for MCUs
due to its fast read speed and long write endurance.
However, PCM, especially multi-level cell (MLC) PCM,
has long write latency and requires large write energy,
which diminishes the benefits of its replacement of
traditional Flash. By studying MLC PCM write
operations, we observe that writing MLC PCM can take
advantages of two write modes --- fast write leaves
cells in volatile state, and slow write leaves cells in
non-volatile state. In this paper, we propose a
compiler directed dual-write (CDDW) scheme that selects
the best write mode for each write operation to
maximize the overall performance and energy efficiency.
Our experimental results show that CDDW reduces dynamic
energy by 32.4\%(33.8\%) and improves performance by
6.3\%(35.9\%) compared with an all fast(slow) write
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Guan:2013:BBL,
author = "Yong Guan and Guohui Wang and Yi Wang and Renhai Chen
and Zili Shao",
title = "{BLog}: block-level log-block management for {NAND}
flash memory storage systems",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "111--120",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465560",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Log-block-based FTL (Flash Translation Layer) schemes
have been widely used to manage NAND flash memory
storage systems in industry. In log-block-based FTLs, a
few physical blocks called log blocks are used to hold
all page updates from a large amount of data blocks.
Frequent page updates in log blocks introduce big
overhead so log blocks become the system bottleneck. To
address this problem, this paper presents a block-level
log-block management scheme called BLog (Block-level
Log-Block Management). In BLog, with the block level
management, the update pages of a data block can be
collected together and put into the same log block as
much as possible; therefore, we can effectively reduce
the associativities of log blocks so as to reduce the
garbage collection overhead. We also propose a novel
partial merge operation called reduced-order merge by
which we can effectively postpone the garbage
collection of log blocks so as to maximally utilize
valid pages and reduce unnecessary erase operations in
log blocks. Based on BLog, we design an FTL called
BLogFTL for MLC NAND flash. We conduct experiments on a
mixture of real-world and synthetic traces. The
experimental results show that our scheme outperforms
the previous log-block-based FTLs for MLC NAND flash.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Mehiaoui:2013:TSO,
author = "Asma Mehiaoui and Ernest Wozniak and Sara
Tucci-Piergiovanni and Chokri Mraidha and Marco {Di
Natale} and Haibo Zeng and Jean-Philippe Babau and
Laurent Lemarchand and S{\'e}bastien Gerard",
title = "A two-step optimization technique for functions
placement, partitioning, and priority assignment in
distributed systems",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "121--132",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465572",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern development methodologies from the industry and
the academia for complex real-time systems define a
stage in which application functions are deployed onto
an execution platform. The deployment consists of the
placement of functions on a distributed network of
nodes, the partitioning of functions in tasks and the
scheduling of tasks and messages. None of the existing
optimization techniques deal with the three stages of
the deployment problem at the same time. In this paper,
we present a staged approach towards the efficient
deployment of real-time functions based on genetic
algorithms and mixed integer linear programming
techniques. Application to case studies shows the
applicability of the method to industry-size systems
and the quality of the obtained solutions when compared
to the true optimum for small size examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Bouakaz:2013:BME,
author = "Adnan Bouakaz and Jean-Pierre Talpin",
title = "Buffer minimization in earliest-deadline first
scheduling of dataflow graphs",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "133--142",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465558",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Symbolic schedulability analysis of dataflow graphs is
the process of synthesizing the timing parameters (i.e.
periods, phases, and deadlines) of actors so that the
task system is schedulable and achieves a high
throughput when using a specific scheduling policy.
Furthermore, the resulted schedule must ensure that
communication buffers are underflow- and overflow-free.
This paper describes a (partitioned) earliest-deadline
first symbolic schedulability analysis of dataflow
graphs that minimizes the buffering requirements. Our
scheduling analysis consists of three major steps. (1)
The construction of an abstract affine schedule of the
graph that excludes overflow and underflow exceptions
and minimizes the buffering requirements assuming some
precedences between jobs. (2) Symbolic deadlines
adjustment that guarantees precedences without the need
for lock-based synchronizations. (3) The concretization
of the affine schedule using a symbolic,
fast-converging, processor-demand analysis for both
uniprocessor and multiprocessor systems. Experimental
results show that our technique improves the buffering
requirements in many cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Geuns:2013:ADM,
author = "Stefan J. Geuns and Joost P. H. M. Hausmans and Marco
J. G. Bekooij",
title = "Automatic dataflow model extraction from modal
real-time stream processing applications",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "143--152",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465561",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many real-time stream processing applications are
initially described as a sequential application
containing while-loops, which execute for an unknown
number of iterations. These modal applications have to
be executed in parallel on an MPSoC system in order to
meet their real-time throughput constraints. However,
no suitable approach exists that can automatically
derive a temporal analysis model from a sequential
specification containing while- loops with an unknown
number of iterations. This paper introduces an approach
to the automatic generation of a Structured
Variable-rate Phased Dataflow (SVPDF) model from a
sequential specification of a modal application. The
real-time requirements of an application can be
analyzed despite the presence of while-loops with an
unknown number of iterations. It is shown that an
algorithm that has a polynomial time computational
complexity can be applied on the generated SVPDF model
to determine whether a throughput constraint can be
met. The enabler for the automatic generation of an
SVPDF model is the decoupling of synchronization
between tasks that contain different while-loops. A
DVB-T radio transceiver illustrates the derivation of
the SVPDF model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Wang:2013:PMO,
author = "Cheng Wang and Sunita Chandrasekaran and Peng Sun and
Barbara Chapman and Jim Holt",
title = "Portable mapping of {openMP} to multicore embedded
systems using {MCA APIs}",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "153--162",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multicore embedded systems are being widely used in
telecommunication systems, robotics, medical
applications and more.While they offer a
high-performance with low-power solution, programming
in an efficient way is still a challenge. In order to
exploit the capabilities that the hardware offers,
software developers are expected to handle many of the
low-level details of programming including utilizing
DMA, ensuring cache coherency, and inserting
synchronization primitives explicitly. The
state-of-the-art involves solutions where the software
toolchain is too vendor-specific thus tying the
software to a particular hardware leaving no room-for
portability. In this paper we present a runtime system
to explore mapping a high-level programming model,
OpenMP, on to multicore embedded systems. A key feature
of our scheme is that unlike the existing approaches
that largely rely on POSIX threads, our approach
leverages the Multicore Association (MCA) APIs as an
OpenMP translation layer. The MCA APIs is a set of
low-level APIs handling resource management,
inter-process communications and task scheduling for
multicore embedded systems. By deploying the MCA APIs,
our runtime is able to effectively capture the
characteristics of multicore embedded systems compared
with the POSIX threads. Furthermore, the MCA layer
enables our runtime implementation to be portable
across various architectures. Thus programmers only
need to maintain a single OpenMP code base which is
compatible by various compilers, while on the other
hand, the code is portable across different possible
types of platforms. We have evaluated our runtime
system using several embedded benchmarks. The
experiments demonstrate promising and competitive
performance compared to the native approach for the
platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Huber:2013:CWA,
author = "Benedikt Huber and Daniel Prokesch and Peter
Puschner",
title = "Combined {WCET} analysis of bitcode and machine code
using control-flow relation graphs",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "163--172",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465567",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Static program analyses like stack usage analysis and
worst-case execution time (WCET) analysis depend on the
actual machine code generated by the compiler for the
target system. As the analysis of binary code is
costly, hard to diagnose and platform dependent, it is
preferable to carry out parts of these analyses on a
higher-level program representation. To this end, the
higher-level code and the machine code need to be
related, a difficult task due to the complexity of
modern optimizing compilers. In this article, we
present a novel representation called control-flow
relation graphs, which provide an accurate model of the
control-flow relation between machine code and the
compiler's intermediate representation. In order to
facilitate the integration of our approach in existing
compiler frameworks, we develop a construction
algorithm that builds the control-flow relation graph
from partial mappings provided by the compiler. The
WCET calculation method for control-flow relation
graphs processes flow information from both the
intermediate representation and machine code.
Furthermore, we demonstrate the transformation of flow
information from the IR to the machine code level, in
order to use existing industrial-strength WCET analysis
tools operating on machine code. We implemented the
construction algorithm within the LLVM compiler
framework, along with an implementation of the combined
WCET calculation method. The evaluation demonstrates
that the approach is able to relate bitcode (LLVM's
intermediate representation) and machine code in a
precise way, with a WCET increase of at most 2\% when
using flow facts on the bitcode level, compared to
equivalent ones on the machine-code level. As the
methods presented in this article provide a
cost-effective way to reuse platform independent flow
information, they have the potential to simplify WCET
analysis, and popularize its use in the development
process of real-time systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '12 conference proceedings.",
}
@Article{Smaragdakis:2013:LYF,
author = "Yannis Smaragdakis",
title = "Look up!: your future is in the cloud",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "1--2",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462157",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The ``Cloud'' is a wonderfully expansive phrase used
to denote computation and data storage centralized in a
large datacenter and elastically accessed across a
network. The concept is not new; web sites and business
servers have run in datacenters for a long time. These,
however, were specialized applications, outside of the
mainstream of desktop programs. The past few years has
seen enormous change as the mainstream shifts from a
single computer to mobile devices and clusters of
computers. Three factors are driving this change. (1)
Mobile computing, where apps run on a size- and
power-constrained device and would be far less
interesting without backend systems to augment
computation and storage capacity. (2) Big data, which
uses clusters of computers to extract valuable
information from vast amounts of unstructured data. (3)
Inexpensive, elastic computing, pioneered by Amazon Web
Services, which enables everyone to rapidly obtain and
use many servers. As a researcher from the language and
compiler community, I firmly believe this sea change is
at heart a programming problem. Cloud computing is far
different from the environment in which most of today's
languages and tools were developed, and few programmers
have mastered its complexity. New challenges include
pervasive parallelism, partial failure, high and
variable communication latency, and replication for
reliability and throughput.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Cheung:2013:ODB,
author = "Alvin Cheung and Armando Solar-Lezama and Samuel
Madden",
title = "Optimizing database-backed applications with query
synthesis",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "3--14",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462180",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Object-relational mapping libraries are a popular way
for applications to interact with databases because
they provide transparent access to the database using
the same language as the application. Unfortunately,
using such frameworks often leads to poor performance,
as modularity concerns encourage developers to
implement relational operations in application code.
Such application code does not take advantage of the
optimized relational implementations that database
systems provide, such as efficient implementations of
joins or push down of selection predicates. In this
paper we present QBS, a system that automatically
transforms fragments of application logic into SQL
queries. QBS differs from traditional compiler
optimizations as it relies on synthesis technology to
generate invariants and postconditions for a code
fragment. The postconditions and invariants are
expressed using a new theory of ordered relations that
allows us to reason precisely about both the contents
and order of the records produced complex code
fragments that compute joins and aggregates. The theory
is close in expressiveness to SQL, so the synthesized
postconditions can be readily translated to SQL
queries. Using 75 code fragments automatically
extracted from over 120k lines of open-source code
written using the Java Hibernate ORM, we demonstrate
that our approach can convert a variety of imperative
constructs into relational specifications and
significantly improve application performance
asymptotically by orders of magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Singh:2013:AFG,
author = "Rishabh Singh and Sumit Gulwani and Armando
Solar-Lezama",
title = "Automated feedback generation for introductory
programming assignments",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "15--26",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462195",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new method for automatically providing
feedback for introductory programming problems. In
order to use this method, we need a reference
implementation of the assignment, and an error model
consisting of potential corrections to errors that
students might make. Using this information, the system
automatically derives minimal corrections to student's
incorrect solutions, providing them with a measure of
exactly how incorrect a given solution was, as well as
feedback about what they did wrong. We introduce a
simple language for describing error models in terms of
correction rules, and formally define a rule-directed
translation strategy that reduces the problem of
finding minimal corrections in an incorrect program to
the problem of synthesizing a correct program from a
sketch. We have evaluated our system on thousands of
real student attempts obtained from the Introduction to
Programming course at MIT (6.00) and MITx (6.00x). Our
results show that relatively simple error models can
correct on average 64\% of all incorrect submissions in
our benchmark set.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Gvero:2013:CCU,
author = "Tihomir Gvero and Viktor Kuncak and Ivan Kuraj and
Ruzica Piskac",
title = "Complete completion using types and weights",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "27--38",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462192",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developing modern software typically involves
composing functionality from existing libraries. This
task is difficult because libraries may expose many
methods to the developer. To help developers in such
scenarios, we present a technique that synthesizes and
suggests valid expressions of a given type at a given
program point. As the basis of our technique we use
type inhabitation for lambda calculus terms in long
normal form. We introduce a succinct representation for
type judgements that merges types into equivalence
classes to reduce the search space, then reconstructs
any desired number of solutions on demand. Furthermore,
we introduce a method to rank solutions based on
weights derived from a corpus of code. We implemented
the algorithm and deployed it as a plugin for the
Eclipse IDE for Scala. We show that the techniques we
incorporated greatly increase the effectiveness of the
approach. Our evaluation benchmarks are code examples
from programming practice; we make them available for
future comparisons.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Johnson:2013:FCP,
author = "Nick P. Johnson and Taewook Oh and Ayal Zaks and David
I. August",
title = "Fast condensation of the program dependence graph",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "39--50",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491960",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aggressive compiler optimizations are formulated
around the Program Dependence Graph (PDG). Many
techniques, including loop fission and parallelization
are concerned primarily with dependence cycles in the
PDG. The Directed Acyclic Graph of Strongly Connected
Components (DAGSCC) represents these cycles directly.
The naive method to construct the DAGSCC first computes
the full PDG. This approach limits adoption of
aggressive optimizations because the number of analysis
queries grows quadratically with program size, making
DAGSCC construction expensive. Consequently, compilers
optimize small scopes with weaker but faster analyses.
We observe that many PDG edges do not affect the DAGSCC
and that ignoring them cannot affect clients of the
DAGSCC. Exploiting this insight, we present an
algorithm to omit those analysis queries to compute the
DAGSCC efficiently. Across 366 hot loops from 20
SPEC2006 benchmarks, this method computes the DAGSCC in
half of the time using half as many queries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{ElWazeer:2013:SVD,
author = "Khaled ElWazeer and Kapil Anand and Aparna Kotha and
Matthew Smithson and Rajeev Barua",
title = "Scalable variable and data type detection in a binary
rewriter",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "51--60",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462165",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present scalable static analyses to recover
variables, data types, and function prototypes from
stripped x86 executables (without symbol or debug
information) and obtain a functional intermediate
representation (IR) for analysis and rewriting
purposes. Our techniques on average run $ 352 \times $
faster than current techniques and still have the same
precision. This enables analyzing executables as large
as millions of instructions in minutes which is not
possible using existing techniques. Our techniques can
recover variables allocated to the floating point
stack, unlike current techniques. We have integrated
our techniques to obtain a compiler level IR that works
correctly if recompiled and produces the same output as
the input executable. We demonstrate scalability,
precision and correctness of our proposed techniques by
evaluating them on the complete SPEC2006 benchmarks
suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Rajaram:2013:FRT,
author = "Bharghava Rajaram and Vijay Nagarajan and Susmit
Sarkar and Marco Elver",
title = "Fast {RMWs} for {TSO}: semantics and implementation",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "61--72",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462196",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Read-Modify-Write (RMW) instructions are widely used
as the building blocks of a variety of higher level
synchronization constructs, including locks, barriers,
and lock-free data structures. Unfortunately, they are
expensive in architectures such as x86 and SPARC which
enforce (variants of) Total-Store-Order (TSO). A key
reason is that RMWs in these architectures are ordered
like a memory barrier, incurring the cost of a
write-buffer drain in the critical path. Such strong
ordering semantics are dictated by the requirements of
the strict atomicity definition (type-1) that existing
TSO RMWs use. Programmers often do not need such strong
semantics. Besides, weakening the atomicity definition
of TSO RMWs, would also weaken their ordering ---
thereby leading to more efficient hardware
implementations. In this paper we argue for TSO RMWs to
use weaker atomicity definitions --- we consider two
weaker definitions: type-2 and type-3, with different
relaxed ordering differences. We formally specify how
such weaker RMWs would be ordered, and show that type-2
RMWs, in particular, can seamlessly replace existing
type-1 RMWs in common synchronization idioms --- except
in situations where a type-1 RMW is used as a memory
barrier. Recent work has shown that the new C/C++11
concurrency model can be realized by generating
conventional (type-1) RMWs for C/C++11 SC-atomic-writes
and/or SC-atomic-reads. We formally prove that this is
equally valid using the proposed type-2 RMWs; type-3
RMWs, on the other hand, could be used for
SC-atomic-reads (and optionally SC-atomic-writes). We
further propose efficient microarchitectural
implementations for type-2 (type-3) RMWs --- simulation
results show that our implementation reduces the cost
of an RMW by up to 58.9\% (64.3\%), which translates
into an overall performance improvement of up to 9.0\%
(9.2\%) on a set of parallel programs, including those
from the SPLASH-2, PARSEC, and STAMP benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Gordon:2013:RGR,
author = "Colin S. Gordon and Michael D. Ernst and Dan
Grossman",
title = "Rely-guarantee references for refinement types over
aliased mutable data",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "73--84",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462160",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reasoning about side effects and aliasing is the heart
of verifying imperative programs. Unrestricted side
effects through one reference can invalidate
assumptions about an alias. We present a new type
system approach to reasoning about safe assumptions in
the presence of aliasing and side effects, unifying
ideas from reference immutability type systems and
rely-guarantee program logics. Our approach,
rely-guarantee references, treats multiple references
to shared objects similarly to multiple threads in
rely-guarantee program logics. We propose statically
associating rely and guarantee conditions with
individual references to shared objects. Multiple
aliases to a given object may coexist only if the
guarantee condition of each alias implies the rely
condition for all other aliases. We demonstrate that
existing reference immutability type systems are
special cases of rely-guarantee references. In addition
to allowing precise control over state modification,
rely-guarantee references allow types to depend on
mutable data while still permitting flexible aliasing.
Dependent types whose denotation is stable over the
actions of the rely and guarantee conditions for a
reference and its data will not be invalidated by any
action through any alias. We demonstrate this with
refinement (subset) types that may depend on mutable
data. As a special case, we derive the first reference
immutability type system with dependent types over
immutable data. We show soundness for our approach and
describe experience using rely-guarantee references in
a dependently-typed monadic DSL in Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Titzer:2013:HCF,
author = "Ben L. Titzer",
title = "Harmonizing classes, functions, tuples, and type
parameters in {Virgil III}",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "85--94",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491962",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Languages are becoming increasingly multi-paradigm.
Subtype polymorphism in statically-typed
object-oriented languages is being supplemented with
parametric polymorphism in the form of generics.
Features like first-class functions and lambdas are
appearing everywhere. Yet existing languages like Java,
C\#, C++, D, and Scala seem to accrete ever more
complexity when they reach beyond their original
paradigm into another; inevitably older features have
some rough edges that lead to nonuniformity and
pitfalls. Given a fresh start, a new language designer
is faced with a daunting array of potential features.
Where to start? What is important to get right first,
and what can be added later? What features must work
together, and what features are orthogonal? We report
on our experience with Virgil III, a practical language
with a careful balance of classes, functions, tuples
and type parameters. Virgil intentionally lacks many
advanced features, yet we find its core feature set
enables new species of design patterns that bridge
multiple paradigms and emulate features not directly
supported such as interfaces, abstract data types, ad
hoc polymorphism, and variant types. Surprisingly, we
find variance for function types and tuple types often
replaces the need for other kinds of type variance when
libraries are designed in a more functional style.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Burckhardt:2013:ACF,
author = "Sebastian Burckhardt and Manuel Fahndrich and Peli de
Halleux and Sean McDirmid and Michal Moskal and Nikolai
Tillmann and Jun Kato",
title = "{It}'s alive! {Continuous} feedback in {UI}
programming",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "95--104",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462170",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Live programming allows programmers to edit the code
of a running program and immediately see the effect of
the code changes. This tightening of the traditional
edit-compile-run cycle reduces the cognitive gap
between program code and execution, improving the
learning experience of beginning programmers while
boosting the productivity of seasoned ones.
Unfortunately, live programming is difficult to realize
in practice as imperative languages lack well-defined
abstraction boundaries that make live programming
responsive or its feedback comprehensible. This paper
enables live programming for user interface programming
by cleanly separating the rendering and non-rendering
aspects of a UI program, allowing the display to be
refreshed on a code change without restarting the
program. A type and effect system formalizes this
separation and provides an evaluation model that
incorporates the code update step. By putting live
programming on a more formal footing, we hope to enable
critical and technical discussion of live programming
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{DeVito:2013:TMS,
author = "Zachary DeVito and James Hegarty and Alex Aiken and
Pat Hanrahan and Jan Vitek",
title = "{Terra}: a multi-stage language for high-performance
computing",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "105--116",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462166",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High-performance computing applications, such as
auto-tuners and domain-specific languages, rely on
generative programming techniques to achieve high
performance and portability. However, these systems are
often implemented in multiple disparate languages and
perform code generation in a separate process from
program execution, making certain optimizations
difficult to engineer. We leverage a popular scripting
language, Lua, to stage the execution of a novel
low-level language, Terra. Users can implement
optimizations in the high-level language, and use
built-in constructs to generate and execute
high-performance Terra code. To simplify
meta-programming, Lua and Terra share the same lexical
environment, but, to ensure performance, Terra code can
execute independently of Lua's runtime. We evaluate our
design by reimplementing existing multi-language
systems entirely in Terra. Our Terra-based auto-tuner
for BLAS routines performs within 20\% of ATLAS, and
our DSL for stencil computations runs 2.3x faster than
hand-written C.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Li:2013:SIA,
author = "Jiajia Li and Guangming Tan and Mingyu Chen and
Ninghui Sun",
title = "{SMAT}: an input adaptive auto-tuner for sparse
matrix-vector multiplication",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "117--126",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462181",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sparse Matrix Vector multiplication (SpMV) is an
important kernel in both traditional high performance
computing and emerging data-intensive applications. By
far, SpMV libraries are optimized by either
application-specific or architecture-specific
approaches, making the libraries become too complicated
to be used extensively in real applications. In this
work we develop a Sparse Matrix-vector multiplication
Auto-Tuning system (SMAT) to bridge the gap between
specific optimizations and general-purpose usage. SMAT
provides users with a unified programming interface in
compressed sparse row (CSR) format and automatically
determines the optimal format and implementation for
any input sparse matrix at runtime. For this purpose,
SMAT leverages a learning model, which is generated in
an off-line stage by a machine learning method with a
training set of more than 2000 matrices from the UF
sparse matrix collection, to quickly predict the best
combination of the matrix feature parameters. Our
experiments show that SMAT achieves impressive
performance of up to 51GFLOPS in single-precision and
37GFLOPS in double-precision on mainstream x86
multi-core processors, which are both more than 3 times
faster than the Intel MKL library. We also demonstrate
its adaptability in an algebraic multigrid solver from
Hypre library with above 20\% performance improvement
reported.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Kong:2013:WPT,
author = "Martin Kong and Richard Veras and Kevin Stock and
Franz Franchetti and Louis-No{\"e}l Pouchet and P.
Sadayappan",
title = "When polyhedral transformations meet {SIMD} code
generation",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "127--138",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462187",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data locality and parallelism are critical
optimization objectives for performance on modern
multi-core machines. Both coarse-grain parallelism
(e.g., multi-core) and fine-grain parallelism (e.g.,
vector SIMD) must be effectively exploited, but despite
decades of progress at both ends, current compiler
optimization schemes that attempt to address data
locality and both kinds of parallelism often fail at
one of the three objectives. We address this problem by
proposing a 3-step framework, which aims for integrated
data locality, multi-core parallelism and SIMD
execution of programs. We define the concept of
vectorizable codelets, with properties tailored to
achieve effective SIMD code generation for the
codelets. We leverage the power of a modern high-level
transformation framework to restructure a program to
expose good ISA-independent vectorizable codelets,
exploiting multi-dimensional data reuse. Then, we
generate ISA-specific customized code for the codelets,
using a collection of lower-level SIMD-focused
optimizations. We demonstrate our approach on a
collection of numerical kernels that we automatically
tile, parallelize and vectorize, exhibiting significant
performance improvements over existing compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Schneider:2013:PLS,
author = "Fred B. Schneider",
title = "Programming languages in security: keynote",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "139--140",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462158",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Huang:2013:CRL,
author = "Jeff Huang and Charles Zhang and Julian Dolby",
title = "{CLAP}: recording local executions to reproduce
concurrency failures",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "141--152",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462167",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present CLAP, a new technique to reproduce
concurrency bugs. CLAP has two key steps. First, it
logs thread local execution paths at runtime. Second,
offline, it computes memory dependencies that accord
with the logged execution and are able to reproduce the
observed bug. The second step works by combining
constraints from the thread paths and constraints based
on a memory model, and computing an execution with a
constraint solver. CLAP has four major advantages.
First, logging purely local execution of each thread is
substantially cheaper than logging memory interactions,
which enables CLAP to be efficient compared to previous
approaches. Second, our logging does not require any
synchronization and hence with no added memory barriers
or fences; this minimizes perturbation and missed bugs
due to extra synchronizations foreclosing certain racy
behaviors. Third, since it uses no synchronization, we
extend CLAP to work on a range of relaxed memory
models, such as TSO and PSO, in addition to sequential
consistency. Fourth, CLAP can compute a much simpler
execution than the original one, that reveals the bug
with minimal thread context switches. To mitigate the
scalability issues, we also present an approach to
parallelize constraint solving, which theoretically
scales our technique to programs with arbitrary
execution length. Experimental results on a variety of
multithreaded benchmarks and real world concurrent
applications validate these advantages by showing that
our technique is effective in reproducing concurrency
bugs even under relaxed memory models; furthermore, it
is significantly more efficient than a state-of-the-art
technique that records shared memory dependencies,
reducing execution time overhead by 45\% and log size
by 88\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Elmas:2013:CDS,
author = "Tayfun Elmas and Jacob Burnim and George Necula and
Koushik Sen",
title = "{CONCURRIT}: a domain specific language for
reproducing concurrency bugs",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "153--164",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462162",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present CONCURRIT, a domain-specific language (DSL)
for reproducing concurrency bugs. Given some partial
information about the nature of a bug in an
application, a programmer can write a CONCURRIT script
to formally and concisely specify a set of thread
schedules to explore in order to find a schedule
exhibiting the bug. Further, the programmer can specify
how these thread schedules should be searched to find a
schedule that reproduces the bug. We implemented
CONCURRIT as an embedded DSL in C++, which uses manual
or automatic source instrumentation to partially
control the scheduling of the software under test.
Using CONCURRIT, we were able to write concise tests to
reproduce concurrency bugs in a variety of benchmarks,
including the Mozilla's SpiderMonkey JavaScript engine,
Memcached, Apache's HTTP server, and MySQL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Schaefer:2013:DDA,
author = "Max Sch{\"a}efer and Manu Sridharan and Julian Dolby
and Frank Tip",
title = "Dynamic determinacy analysis",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "165--174",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462168",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an analysis for identifying determinate
variables and expressions that always have the same
value at a given program point. This information can be
exploited by client analyses and tools to, e.g.,
identify dead code or specialize uses of dynamic
language constructs such as eval, replacing them with
equivalent static constructs. Our analysis is
completely dynamic and only needs to observe a single
execution of the program, yet the determinacy facts it
infers hold for any execution. We present a formal
soundness proof of the analysis for a simple imperative
language, and a prototype implementation that handles
full JavaScript. Finally, we report on two case studies
that explored how static analysis for JavaScript could
leverage the information gathered by dynamic
determinacy analysis. We found that in some cases
scalability of static pointer analysis was improved
dramatically, and that many uses of runtime code
generation could be eliminated.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Zhao:2013:FVS,
author = "Jianzhou Zhao and Santosh Nagarakatte and Milo M. K.
Martin and Steve Zdancewic",
title = "Formal verification of {SSA}-based optimizations for
{LLVM}",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "175--186",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462164",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern compilers, such as LLVM and GCC, use a static
single assignment (SSA) intermediate representation
(IR) to simplify and enable many advanced
optimizations. However, formally verifying the
correctness of SSA-based optimizations is challenging
because SSA properties depend on a function's entire
control-flow graph. This paper addresses this challenge
by developing a proof technique for proving SSA-based
program invariants and compiler optimizations. We use
this technique in the Coq proof assistant to create
mechanized correctness proofs of several ``micro''
transformations that form the building blocks for
larger SSA optimizations. To demonstrate the utility of
this approach, we formally verify a variant of LLVM's
mem2reg transformation in Vellvm, a Coq-based formal
semantics of the LLVM IR. The extracted implementation
generates code with performance comparable to that of
LLVM's unverified implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Morisset:2013:CTT,
author = "Robin Morisset and Pankaj Pawan and Francesco Zappa
Nardelli",
title = "Compiler testing via a theory of sound optimisations
in the {C11\slash C++11} memory model",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "187--196",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491967",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compilers sometimes generate correct sequential code
but break the concurrency memory model of the
programming language: these subtle compiler bugs are
observable only when the miscompiled functions interact
with concurrent contexts, making them particularly hard
to detect. In this work we design a strategy to reduce
the hard problem of hunting concurrency compiler bugs
to differential testing of sequential code and build a
tool that puts this strategy to work. Our first
contribution is a theory of sound optimisations in the
C11/C++11 memory model, covering most of the
optimisations we have observed in real compilers and
validating the claim that common compiler optimisations
are sound in the C11/C++11 memory model. Our second
contribution is to show how, building on this theory,
concurrency compiler bugs can be identified by
comparing the memory trace of compiled code against a
reference memory trace for the source code. Our tool
identified several mistaken write introductions and
other unexpected behaviours in the latest release of
the gcc compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Chen:2013:TCF,
author = "Yang Chen and Alex Groce and Chaoqiang Zhang and
Weng-Keen Wong and Xiaoli Fern and Eric Eide and John
Regehr",
title = "Taming compiler fuzzers",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "197--208",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462173",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aggressive random testing tools (``fuzzers'') are
impressively effective at finding compiler bugs. For
example, a single test-case generator has resulted in
more than 1,700 bugs reported for a single JavaScript
engine. However, fuzzers can be frustrating to use:
they indiscriminately and repeatedly find bugs that may
not be severe enough to fix right away. Currently,
users filter out undesirable test cases using ad hoc
methods such as disallowing problematic features in
tests and grepping test results. This paper formulates
and addresses the fuzzer taming problem: given a
potentially large number of random test cases that
trigger failures, order them such that diverse,
interesting test cases are highly ranked. Our
evaluation shows our ability to solve the fuzzer taming
problem for 3,799 test cases triggering 46 bugs in a C
compiler and 2,603 test cases triggering 28 bugs in a
JavaScript engine.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Blackshear:2013:ACS,
author = "Sam Blackshear and Shuvendu K. Lahiri",
title = "Almost-correct specifications: a modular semantic
framework for assigning confidence to warnings",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "209--218",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462188",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modular assertion checkers are plagued with false
alarms due to the need for precise environment
specifications (preconditions and callee
postconditions). Even the fully precise checkers report
assertion failures under the most demonic environments
allowed by unconstrained or partial specifications. The
inability to preclude overly adversarial environments
makes such checkers less attractive to developers and
severely limits the adoption of such tools in the
development cycle. In this work, we propose a
parameterized framework for prioritizing the assertion
failures reported by a modular verifier, with the goal
of suppressing warnings from overly demonic
environments. We formalize it almost-correct
specifications as the minimal weakening of an angelic
specification (over a set of predicates) that precludes
any dead code intraprocedurally. Our work is inspired
by and generalizes some aspects of semantic
inconsistency detection. Our formulation allows us to
lift this idea to a general class of warnings. We have
developed a prototype {\tt acspec}, which we use to
explore a few instantiations of the framework and
report preliminary findings on a diverse set of C
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Cook:2013:RAN,
author = "Byron Cook and Eric Koskinen",
title = "Reasoning about nondeterminism in programs",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "219--230",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491969",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Branching-time temporal logics (e.g. CTL, CTL*, modal
mu-calculus) allow us to ask sophisticated questions
about the nondeterminism that appears in systems.
Applications of this type of reasoning include
planning, games, security analysis, disproving,
precondition synthesis, environment synthesis, etc.
Unfortunately, existing automatic branching-time
verification tools have limitations that have
traditionally restricted their applicability (e.g.
push-down systems only, universal path quantifiers
only, etc). In this paper we introduce an automation
strategy that lifts many of these previous
restrictions. Our method works reliably for properties
with non-trivial mixtures of universal and existential
modal operators. Furthermore, our approach is designed
to support (possibly infinite-state) programs. The
basis of our approach is the observation that
existential reasoning can be reduced to universal
reasoning if the system's state-space is appropriately
restricted. This restriction on the state-space must
meet a constraint derived from recent work on proving
non-termination. The observation leads to a new route
for implementation based on existing tools. To
demonstrate the practical viability of our approach, we
report on the results applying our preliminary
implementation to a set of benchmarks drawn from the
Windows operating system, the PostgreSQL database
server, SoftUpdates patching system, as well as other
hand-crafted examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Qiu:2013:NPS,
author = "Xiaokang Qiu and Pranav Garg and Andrei Stefanescu and
Parthasarathy Madhusudan",
title = "Natural proofs for structure, data, and separation",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "231--242",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462169",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose natural proofs for reasoning with programs
that manipulate data-structures against specifications
that describe the structure of the heap, the data
stored within it, and separation and framing of
sub-structures. Natural proofs are a subclass of proofs
that are amenable to completely automated reasoning,
that provide sound but incomplete procedures, and that
capture common reasoning tactics in program
verification. We develop a dialect of separation logic
over heaps, called Dryad, with recursive definitions
that avoids explicit quantification. We develop ways to
reason with heaplets using classical logic over the
theory of sets, and develop natural proofs for
reasoning using proof tactics involving disciplined
unfoldings and formula abstractions. Natural proofs are
encoded into decidable theories of first-order logic so
as to be discharged using SMT solvers. We also
implement the technique and show that a large class of
more than 100 correct programs that manipulate
data-structures are amenable to full functional
correctness using the proposed natural proof method.
These programs are drawn from a variety of sources
including standard data-structures, the Schorr--Waite
algorithm for garbage collection, a large number of
low-level C routines from the Glib library and OpenBSD
library, the Linux kernel, and routines from a secure
verified OS-browser project. Our work is the first that
we know of that can handle such a wide range of full
functional verification properties of heaps
automatically, given pre/post and loop invariant
annotations. We believe that this work paves the way
for deductive verification technology to be used by
programmers who do not (and need not) understand the
internals of the underlying logic solvers,
significantly increasing their applicability in
building reliable systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Yu:2013:GDS,
author = "Hongtao Yu and Hou-Jen Ko and Zhiyuan Li",
title = "General data structure expansion for multi-threading",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "243--252",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462182",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Among techniques for parallelizing sequential codes,
privatization is a common and significant
transformation performed by both compilers and runtime
parallelizing systems. Without privatization,
repetitive updates to the same data structures often
introduce spurious data dependencies that hide the
inherent parallelism. Unfortunately, it remains a
significant challenge to compilers to automatically
privatize dynamic and recursive data structures which
appear frequently in real applications written in
languages such as C/C++. This is because such languages
lack a naming mechanism to define the address range of
a pointer-based data structure, in contrast to arrays
with explicitly declared bounds. In this paper we
present a novel solution to this difficult problem by
expanding general data structures such that memory
accesses issued from different threads to contentious
data structures are directed to different data fields.
Based on compile-time type checking and a data
dependence graph, this aggressive extension to the
traditional scalar and array expansion isolates the
address ranges among different threads, without
struggling with privatization based on thread-private
stacks, such that the targeted loop can be effectively
parallelized. With this method fully implemented in
GCC, experiments are conducted on a set of programs
from well-known benchmark suites such as Mibench,
MediaBench II and SPECint. Results show that the new
approach can lead to a high speedup when executing the
transformed code on multiple cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Hung:2013:AAS,
author = "Wei-Lun Hung and Vijay K. Garg",
title = "{AutoSynch}: an automatic-signal monitor based on
predicate tagging",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "253--262",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462175",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "{Most programming languages use monitors with explicit
signals for synchronization in shared-memory programs.
Requiring programmers to signal threads explicitly
results in many concurrency bugs due to missed
notifications, or notifications on wrong condition
variables. In this paper, we describe an implementation
of an automatic signaling monitor in Java called
AutoSynch that eliminates such concurrency bugs by
removing the burden of signaling from the programmer.
We show that the belief that automatic signaling
monitors are prohibitively expensive is wrong. For most
problems, programs based on AutoSynch are almost as
fast as those based on explicit signaling. For some,
AutoSynch is even faster than explicit signaling
because it never uses signalAll, whereas the
programmers end up using signalAll with the explicit
signal mechanism. AutoSynch} achieves efficiency in
synchronization based on three novel ideas. We
introduce an operation called closure that enables the
predicate evaluation in every thread, thereby reducing
context switches during the execution of the program.
Secondly, AutoSynch avoids signalAll by using a
property called relay invariance that guarantees that
whenever possible there is always at least one thread
whose condition is true which has been signaled.
Finally, AutoSynch uses a technique called predicate
tagging to efficiently determine a thread that should
be signaled. To evaluate the efficiency of AutoSynch,
we have implemented many different well-known
synchronization problems such as the
producers/consumers problem, the readers/writers
problems, and the dining philosophers problem. The
results show that AutoSynch is almost as efficient as
the explicit-signal monitor and even more efficient for
some cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Golan-Gueta:2013:CLF,
author = "Guy Golan-Gueta and G. Ramalingam and Mooly Sagiv and
Eran Yahav",
title = "Concurrent libraries with foresight",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "263--274",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462172",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Linearizable libraries provide operations that appear
to execute atomically. Clients, however, may need to
execute a sequence of operations (a composite
operation) atomically. We consider the problem of
extending a linearizable library to support arbitrary
atomic composite operations by clients. We introduce a
novel approach in which the concurrent library ensures
atomicity of composite operations by exploiting
information (foresight) provided by its clients. We use
a correctness condition, based on a notion of dynamic
right-movers, that guarantees that composite operations
execute atomically without deadlocks, and without using
rollbacks. We present a static analysis to infer the
foresight information required by our approach,
allowing a compiler to automatically insert the
foresight information into the client. This relieves
the client programmer of this burden and simplifies
writing client code. We present a generic technique for
extending the library implementation to realize
foresight-based synchronization. This technique is used
to implement a general-purpose Java library for Map
data structures --- the library permits composite
operations to simultaneously work with multiple
instances of Map data structures. We use the Maps
library and the static analysis to enforce atomicity of
a wide selection of real-life Java composite
operations. Our experiments indicate that our approach
enables realizing efficient and scalable
synchronization for real-life composite operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Blackshear:2013:TPR,
author = "Sam Blackshear and Bor-Yuh Evan Chang and Manu
Sridharan",
title = "{Thresher}: precise refutations for heap
reachability",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "275--286",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462186",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a precise, path-sensitive static analysis
for reasoning about heap reachability, that is, whether
an object can be reached from another variable or
object via pointer dereferences. Precise reachability
information is useful for a number of clients,
including static detection of a class of Android memory
leaks. For this client, we found the heap reachability
information computed by a state-of-the-art points-to
analysis was too imprecise, leading to numerous
false-positive leak reports. Our analysis combines a
symbolic execution capable of path-sensitivity and
strong updates with abstract heap information computed
by an initial flow-insensitive points-to analysis. This
novel mixed representation allows us to achieve both
precision and scalability by leveraging the
pre-computed points-to facts to guide execution and
prune infeasible paths. We have evaluated our
techniques in the Thresher tool, which we used to find
several developer-confirmed leaks in Android
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Udupa:2013:TSP,
author = "Abhishek Udupa and Arun Raghavan and Jyotirmoy V.
Deshmukh and Sela Mador-Haim and Milo M. K. Martin and
Rajeev Alur",
title = "{TRANSIT}: specifying protocols with concolic
snippets",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "287--296",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462174",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the maturing of technology for model checking and
constraint solving, there is an emerging opportunity to
develop programming tools that can transform the way
systems are specified. In this paper, we propose a new
way to program distributed protocols using concolic
snippets. Concolic snippets are sample execution
fragments that contain both concrete and symbolic
values. The proposed approach allows the programmer to
describe the desired system partially using the
traditional model of communicating extended
finite-state-machines (EFSM), along with high-level
invariants and concrete execution fragments. Our
synthesis engine completes an EFSM skeleton by
inferring guards and updates from the given fragments
which is then automatically analyzed using a model
checker with respect to the desired invariants. The
counterexamples produced by the model checker can then
be used by the programmer to add new concrete execution
fragments that describe the correct behavior in the
specific scenario corresponding to the counterexample.
We describe TRANSIT, a language and prototype
implementation of the proposed specification
methodology for distributed protocols. Experimental
evaluations of TRANSIT to specify cache coherence
protocols show that (1) the algorithm for expression
inference from concolic snippets can synthesize
expressions of size 15 involving typical operators over
commonly occurring types, (2) for a classical
directory-based protocol, TRANSIT automatically
generates, in a few seconds, a complete implementation
from a specification consisting of the EFSM structure
and a few concrete examples for every transition, and
(3) a published partial description of the SGI Origin
cache coherence protocol maps directly to symbolic
examples and leads to a complete implementation in a
few iterations, with the programmer correcting
counterexamples resulting from underspecified
transitions by adding concrete examples in each
iteration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Gao:2013:UMR,
author = "Tiejun Gao and Karin Strauss and Stephen M. Blackburn
and Kathryn S. McKinley and Doug Burger and James
Larus",
title = "Using managed runtime systems to tolerate holes in
wearable memories",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "297--308",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462171",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "{New memory technologies, such as phase-change memory
(PCM), promise denser and cheaper main memory, and are
expected to displace DRAM. However, many of them
experience permanent failures far more quickly than
DRAM. DRAM mechanisms that handle permanent failures
rely on very low failure rates and, if directly applied
to PCM, are extremely inefficient: Discarding a page
when the first line fails wastes 98\% of the memory.
This paper proposes low complexity cooperative software
and hardware that handle failure rates as high as 50\%.
Our approach makes error handling transparent to the
application by using the memory abstraction offered by
managed languages. Once hardware error correction for a
memory line is exhausted, rather than discarding the
entire page, the hardware communicates the failed line
to a failure-aware OS and runtime. The runtime ensures
memory allocations never use failed lines and moves
data when lines fail during program execution. This
paper describes minimal extensions to an Immix
mark-region garbage collector, which correctly utilizes
pages with failed physical lines by skipping over
failures. This paper also proposes hardware support
that clusters failed lines at one end of a memory
region to reduce fragmentation and improve performance
under failures. Contrary to accepted hardware wisdom
that advocates for wear-leveling, we show that with
software support non-uniform failures delay the impact
of memory failure. Together, these mechanisms incur no
performance overhead when there are no failures and at
failure levels of 10\% to 50\% suffer only an average
overhead of 4\% and 12\%}, respectively. These results
indicate that hardware and software cooperation can
greatly extend the life of wearable memories.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Cohen:2013:LPC,
author = "Nachshon Cohen and Erez Petrank",
title = "Limitations of partial compaction: towards practical
bounds",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "309--320",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491973",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compaction of a managed heap is considered a costly
operation, and is avoided as much as possible in
commercial runtimes. Instead, partial compaction is
often used to defragment parts of the heap and avoid
space blow up. Previous study of compaction limitation
provided some initial asymptotic bounds but no
implications for practical systems. In this work, we
extend the theory to obtain better bounds and make them
strong enough to become meaningful for modern
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Desai:2013:PSA,
author = "Ankush Desai and Vivek Gupta and Ethan Jackson and
Shaz Qadeer and Sriram Rajamani and Damien Zufferey",
title = "{P}: safe asynchronous event-driven programming",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "321--332",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462184",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe the design and implementation of P, a
domain-specific language to write asynchronous event
driven code. P allows the programmer to specify the
system as a collection of interacting state machines,
which communicate with each other using events. P
unifies modeling and programming into one activity for
the programmer. Not only can a P program be compiled
into executable code, but it can also be tested using
model checking techniques. P allows the programmer to
specify the environment, used to ``close'' the system
during testing, as nondeterministic ghost machines.
Ghost machines are erased during compilation to
executable code; a type system ensures that the erasure
is semantics preserving. The P language is designed so
that a P program can be checked for
responsiveness---the ability to handle every event in a
timely manner. By default, a machine needs to handle
every event that arrives in every state. But handling
every event in every state is impractical. The language
provides a notion of deferred events where the
programmer can annotate when she wants to delay
processing an event. The default safety checker looks
for presence of unhandled events. The language also
provides default liveness checks that an event cannot
be potentially deferred forever. P was used to
implement and verify the core of the USB device driver
stack that ships with Microsoft Windows 8. The
resulting driver is more reliable and performs better
than its prior incarnation (which did not use P); we
have more confidence in the robustness of its design
due to the language abstractions and verification
provided by P.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Green:2013:QSQ,
author = "Alexander S. Green and Peter LeFanu Lumsdaine and Neil
J. Ross and Peter Selinger and Beno{\^\i}t Valiron",
title = "{Quipper}: a scalable quantum programming language",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "333--342",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462177",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The field of quantum algorithms is vibrant. Still,
there is currently a lack of programming languages for
describing quantum computation on a practical scale,
i.e., not just at the level of toy problems. We address
this issue by introducing Quipper, a scalable,
expressive, functional, higher-order quantum
programming language. Quipper has been used to program
a diverse set of non-trivial quantum algorithms, and
can generate quantum gate representations using
trillions of gates. It is geared towards a model of
computation that uses a classical computer to control a
quantum device, but is not dependent on any particular
model of quantum hardware. Quipper has proven effective
and easy to use, and opens the door towards using
formal methods to analyze quantum algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Isradisaikul:2013:REP,
author = "Chinawat Isradisaikul and Andrew C. Myers",
title = "Reconciling exhaustive pattern matching with objects",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "343--354",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462194",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Pattern matching, an important feature of functional
languages, is in conflict with data abstraction and
extensibility, which are central to object-oriented
languages. Modal abstraction offers an integration of
deep pattern matching and convenient iteration
abstractions into an object-oriented setting; however,
because of data abstraction, it is challenging for a
compiler to statically verify properties such as
exhaustiveness. In this work, we extend modal
abstraction in the JMatch language to support static,
modular reasoning about exhaustiveness and redundancy.
New matching specifications allow these properties to
be checked using an SMT solver. We also introduce
expressive pattern-matching constructs. Our evaluation
shows that these new features enable more concise code
and that the performance of checking exhaustiveness and
redundancy is acceptable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Bodden:2013:SLS,
author = "Eric Bodden and T{\'a}rsis Tol{\^e}do and M{\'a}rcio
Ribeiro and Claus Brabrand and Paulo Borba and Mira
Mezini",
title = "{SPL LIFT}: statically analyzing software product
lines in minutes instead of years",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "355--364",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491976",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A software product line (SPL) encodes a potentially
large variety of software products as variants of some
common code base. Up until now, re-using traditional
static analyses for SPLs was virtually intractable, as
it required programmers to generate and analyze all
products individually. In this work, however, we show
how an important class of existing inter-procedural
static analyses can be transparently lifted to SPLs.
Without requiring programmers to change a single line
of code, our approach SPLLIFT automatically converts
any analysis formulated for traditional programs within
the popular IFDS framework for inter-procedural,
finite, distributive, subset problems to an SPL-aware
analysis formulated in the IDE framework, a well-known
extension to IFDS. Using a full implementation based on
Heros, Soot, CIDE and JavaBDD, we show that with
SPLLIFT one can reuse IFDS-based analyses without
changing a single line of code. Through experiments
using three static analyses applied to four Java-based
product lines, we were able to show that our approach
produces correct results and outperforms the
traditional approach by several orders of magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Zhang:2013:FOA,
author = "Xin Zhang and Mayur Naik and Hongseok Yang",
title = "Finding optimum abstractions in parametric dataflow
analysis",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "365--376",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462185",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a technique to efficiently search a large
family of abstractions in order to prove a query using
a parametric dataflow analysis. Our technique either
finds the cheapest such abstraction or shows that none
exists. It is based on counterexample-guided
abstraction refinement but applies a novel
meta-analysis on abstract counterexample traces to
efficiently find abstractions that are incapable of
proving the query. We formalize the technique in a
generic framework and apply it to two analyses: a
type-state analysis and a thread-escape analysis. We
demonstrate the effectiveness of the technique on a
suite of Java benchmark programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Apinis:2013:HCW,
author = "Kalmer Apinis and Helmut Seidl and Vesal Vojdani",
title = "How to combine widening and narrowing for
non-monotonic systems of equations",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "377--386",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462190",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Non-trivial analysis problems require complete
lattices with infinite ascending and descending chains.
In order to compute reasonably precise post-fixpoints
of the resulting systems of equations, Cousot and
Cousot have suggested accelerated fixpoint iteration by
means of widening and narrowing. The strict separation
into phases, however, may unnecessarily give up
precision that cannot be recovered later. While
widening is also applicable if equations are
non-monotonic, this is no longer the case for
narrowing. A narrowing iteration to improve a given
post-fixpoint, additionally, must assume that all
right-hand sides are monotonic. The latter assumption,
though, is not met in presence of widening. It is also
not met by equation systems corresponding to
context-sensitive interprocedural analysis, possibly
combining context-sensitive analysis of local
information with flow-insensitive analysis of globals.
As a remedy, we present a novel operator that combines
a given widening operator with a given narrowing
operator. We present adapted versions of round-robin as
well as of worklist iteration, local, and
side-effecting solving algorithms for the combined
operator and prove that the resulting solvers always
return sound results and are guaranteed to terminate
for monotonic systems whenever only finitely many
unknowns (constraint variables) are encountered.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Swamy:2013:VHO,
author = "Nikhil Swamy and Joel Weinberger and Cole Schlesinger
and Juan Chen and Benjamin Livshits",
title = "Verifying higher-order programs with the {Dijkstra}
monad",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "387--398",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491978",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern programming languages, ranging from Haskell and
ML, to JavaScript, C\# and Java, all make extensive use
of higher-order state. This paper advocates a new
verification methodology for higher-order stateful
programs, based on a new monad of predicate
transformers called the Dijkstra monad. Using the
Dijkstra monad has a number of benefits. First, the
monad naturally yields a weakest pre-condition
calculus. Second, the computed specifications are
structurally simpler in several ways, e.g.,
single-state post-conditions are sufficient (rather
than the more complex two-state post-conditions).
Finally, the monad can easily be varied to handle
features like exceptions and heap invariants, while
retaining the same type inference algorithm. We
implement the Dijkstra monad and its type inference
algorithm for the F* programming language. Our most
extensive case study evaluates the Dijkstra monad and
its F* implementation by using it to verify JavaScript
programs. Specifically, we describe a tool chain that
translates programs in a subset of JavaScript decorated
with assertions and loop invariants to F*. Once in F*,
our type inference algorithm computes verification
conditions and automatically discharges their proofs
using an SMT solver. We use our tools to prove that a
core model of the JavaScript runtime in F* respects
various invariants and that a suite of JavaScript
source programs are free of runtime errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Sergey:2013:MAI,
author = "Ilya Sergey and Dominique Devriese and Matthew Might
and Jan Midtgaard and David Darais and Dave Clarke and
Frank Piessens",
title = "Monadic abstract interpreters",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "399--410",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2491979",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent developments in the systematic construction of
abstract interpreters hinted at the possibility of a
broad unification of concepts in static analysis. We
deliver that unification by showing
context-sensitivity, polyvariance, flow-sensitivity,
reachability-pruning, heap-cloning and
cardinality-bounding to be independent of any
particular semantics. Monads become the unifying agent
between these concepts and between semantics. For
instance, by plugging the same ``context-insensitivity
monad'' into a monadically-parameterized semantics for
Java or for the lambda calculus, it yields the expected
context-insensitive analysis. To achieve this
unification, we develop a systematic method for
transforming a concrete semantics into a
monadically-parameterized abstract machine. Changing
the monad changes the behavior of the machine. By
changing the monad, we recover a spectrum of
machines---from the original concrete semantics to a
monovariant, flow- and context-insensitive static
analysis with a singly-threaded heap and weak updates.
The monadic parameterization also suggests an
abstraction over the ubiquitous monotone fixed-point
computation found in static analysis. This abstraction
makes it straightforward to instrument an analysis with
high-level strategies for improving precision and
performance, such as abstract garbage collection and
widening. While the paper itself runs the development
for continuation-passing style, our generic
implementation replays it for direct-style
lambda-calculus and Featherweight Java to support
generality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Czaplicki:2013:AFR,
author = "Evan Czaplicki and Stephen Chong",
title = "Asynchronous functional reactive programming for
{GUIs}",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "411--422",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462161",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphical user interfaces (GUIs) mediate many of our
interactions with computers. Functional Reactive
Programming (FRP) is a promising approach to GUI
design, providing high-level, declarative,
compositional abstractions to describe user
interactions and time-dependent computations. We
present Elm, a practical FRP language focused on easy
creation of responsive GUIs. Elm has two major
features: simple declarative support for Asynchronous
FRP; and purely functional graphical layout.
Asynchronous FRP allows the programmer to specify when
the global ordering of event processing can be
violated, and thus enables efficient concurrent
execution of FRP programs; long-running computation can
be executed asynchronously and not adversely affect the
responsiveness of the user interface. Layout in Elm is
achieved using a purely functional declarative
framework that makes it simple to create and combine
text, images, and video into rich multimedia displays.
Together, Elm's two major features simplify the
complicated task of creating responsive and usable
GUIs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Kastrinis:2013:HCS,
author = "George Kastrinis and Yannis Smaragdakis",
title = "Hybrid context-sensitivity for points-to analysis",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "423--434",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462191",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Context-sensitive points-to analysis is valuable for
achieving high precision with good performance. The
standard flavors of context-sensitivity are
call-site-sensitivity (kCFA) and object-sensitivity.
Combining both flavors of context-sensitivity increases
precision but at an infeasibly high cost. We show that
a selective combination of call-site- and
object-sensitivity for Java points-to analysis is
highly profitable. Namely, by keeping a combined
context only when analyzing selected language features,
we can closely approximate the precision of an analysis
that keeps both contexts at all times. In terms of
speed, the selective combination of both kinds of
context not only vastly outperforms non-selective
combinations but is also faster than a mere
object-sensitive analysis. This result holds for a
large array of analyses (e.g., 1-object-sensitive,
2-object-sensitive with a context-sensitive heap,
type-sensitive) establishing a new set of
performance/precision sweet spots.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Zhang:2013:FAD,
author = "Qirun Zhang and Michael R. Lyu and Hao Yuan and
Zhendong Su",
title = "Fast algorithms for {Dyck--CFL}-reachability with
applications to alias analysis",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "435--446",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462159",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The context-free language (CFL) reachability problem
is a well-known fundamental formulation in program
analysis. In practice, many program analyses,
especially pointer analyses, adopt a restricted version
of CFL-reachability, Dyck-CFL-reachability, and compute
on edge-labeled bidirected graphs. Solving the
all-pairs Dyck-CFL-reachability on such bidirected
graphs is expensive. For a bidirected graph with n
nodes and m edges, the traditional dynamic programming
style algorithm exhibits a subcubic time complexity for
the Dyck language with k kinds of parentheses. When the
underlying graphs are restricted to bidirected trees,
an algorithm with O(n log n log k) time complexity was
proposed recently. This paper studies the
Dyck-CFL-reachability problems on bidirected trees and
graphs. In particular, it presents two fast algorithms
with O(n) and O(n + m log m) time complexities on trees
and graphs respectively. We have implemented and
evaluated our algorithms on a state-of-the-art alias
analysis for Java. Results on standard benchmarks show
that our algorithms achieve orders of magnitude speedup
and consume less memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Sankaranarayanan:2013:SAP,
author = "Sriram Sankaranarayanan and Aleksandar Chakarov and
Sumit Gulwani",
title = "Static analysis for probabilistic programs: inferring
whole program properties from finitely many paths",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "447--458",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462179",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose an approach for the static analysis of
probabilistic programs that sense, manipulate, and
control based on uncertain data. Examples include
programs used in risk analysis, medical decision making
and cyber-physical systems. Correctness properties of
such programs take the form of queries that seek the
probabilities of assertions over program variables. We
present a static analysis approach that provides
guaranteed interval bounds on the values (assertion
probabilities) of such queries. First, we observe that
for probabilistic programs, it is possible to conclude
facts about the behavior of the entire program by
choosing a finite, adequate set of its paths. We
provide strategies for choosing such a set of paths and
verifying its adequacy. The queries are evaluated over
each path by a combination of symbolic execution and
probabilistic volume-bound computations. Each path
yields interval bounds that can be summed up with a
``coverage'' bound to yield an interval that encloses
the probability of assertion for the program as a
whole. We demonstrate promising results on a suite of
benchmarks from many different sources including
robotic manipulators and medical decision making
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Liang:2013:MVL,
author = "Hongjin Liang and Xinyu Feng",
title = "Modular verification of linearizability with non-fixed
linearization points",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "459--470",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462189",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Locating linearization points (LPs) is an intuitive
approach for proving linearizability, but it is
difficult to apply the idea in Hoare-style logic for
formal program verification, especially for verifying
algorithms whose LPs cannot be statically located in
the code. In this paper, we propose a program logic
with a lightweight instrumentation mechanism which can
verify algorithms with non-fixed LPs, including the
most challenging ones that use the helping mechanism to
achieve lock-freedom (as in HSY elimination-based
stack), or have LPs depending on unpredictable future
executions (as in the lazy set algorithm), or involve
both features. We also develop a thread-local
simulation as the meta-theory of our logic, and show it
implies contextual refinement, which is equivalent to
linearizability. Using our logic we have successfully
verified various classic algorithms, some of which are
used in the java.util.concurrent package.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Sewell:2013:TVV,
author = "Thomas Arthur Leck Sewell and Magnus O. Myreen and
Gerwin Klein",
title = "Translation validation for a verified {OS} kernel",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "471--482",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462183",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We extend the existing formal verification of the seL4
operating system microkernel from 9500 lines of C
source code to the binary level. We handle all
functions that were part of the previous verification.
Like the original verification, we currently omit the
assembly routines and volatile accesses used to control
system hardware. More generally, we present an approach
for proving refinement between the formal semantics of
a program on the C source level and its formal
semantics on the binary level, thus checking the
validity of compilation, including some optimisations,
and linking, and extending static properties proved of
the source code to the executable. We make use of
recent improvements in SMT solvers to almost fully
automate this process. We handle binaries generated by
unmodified gcc 4.5.1 at optimisation level 1, and can
handle most of seL4 even at optimisation level 2.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Guha:2013:MVN,
author = "Arjun Guha and Mark Reitblatt and Nate Foster",
title = "Machine-verified network controllers",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "483--494",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462178",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In many areas of computing, techniques ranging from
testing to formal modeling to full-blown verification
have been successfully used to help programmers build
reliable systems. But although networks are critical
infrastructure, they have largely resisted analysis
using formal techniques. Software-defined networking
(SDN) is a new network architecture that has the
potential to provide a foundation for network
reasoning, by standardizing the interfaces used to
express network programs and giving them a precise
semantics. This paper describes the design and
implementation of the first machine-verified SDN
controller. Starting from the foundations, we develop a
detailed operational model for OpenFlow (the most
popular SDN platform) and formalize it in the Coq proof
assistant. We then use this model to develop a verified
compiler and run-time system for a high-level network
programming language. We identify bugs in existing
languages and tools built without formal foundations,
and prove that these bugs are absent from our system.
Finally, we describe our prototype implementation and
our experiences using it to build practical
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Nowatzki:2013:GCC,
author = "Tony Nowatzki and Michael Sartin-Tarm and Lorenzo {De
Carli} and Karthikeyan Sankaralingam and Cristian Estan
and Behnam Robatmili",
title = "A general constraint-centric scheduling framework for
spatial architectures",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "495--506",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462163",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Specialized execution using spatial architectures
provides energy efficient computation, but requires
effective algorithms for spatially scheduling the
computation. Generally, this has been solved with
architecture-specific heuristics, an approach which
suffers from poor compiler/architect productivity, lack
of insight on optimality, and inhibits migration of
techniques between architectures. Our goal is to
develop a scheduling framework usable for all spatial
architectures. To this end, we expresses spatial
scheduling as a constraint satisfaction problem using
Integer Linear Programming (ILP). We observe that
architecture primitives and scheduler responsibilities
can be related through five abstractions: placement of
computation, routing of data, managing event timing,
managing resource utilization, and forming the
optimization objectives. We encode these
responsibilities as 20 general ILP constraints, which
are used to create schedulers for the disparate TRIPS,
DySER, and PLUG architectures. Our results show that a
general declarative approach using ILP is
implementable, practical, and typically matches or
outperforms specialized schedulers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Lifflander:2013:STL,
author = "Jonathan Lifflander and Sriram Krishnamoorthy and
Laxmikant V. Kale",
title = "{Steal Tree}: low-overhead tracing of work stealing
schedulers",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "507--518",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462193",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Work stealing is a popular approach to scheduling
task-parallel programs. The flexibility inherent in
work stealing when dealing with load imbalance results
in seemingly irregular computation structures,
complicating the study of its runtime behavior. In this
paper, we present an approach to efficiently trace
async-finish parallel programs scheduled using work
stealing. We identify key properties that allow us to
trace the execution of tasks with low time and space
overheads. We also study the usefulness of the proposed
schemes in supporting algorithms for data-race
detection and retentive stealing presented in the
literature. We demonstrate that the perturbation due to
tracing is within the variation in the execution time
with 99\% confidence and the traces are concise,
amounting to a few tens of kilobytes per thread in most
cases. We also demonstrate that the traces enable
significant reductions in the cost of detecting data
races and result in low, stable space overheads in
supporting retentive stealing for async-finish
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Ragan-Kelley:2013:HLC,
author = "Jonathan Ragan-Kelley and Connelly Barnes and Andrew
Adams and Sylvain Paris and Fr{\'e}do Durand and Saman
Amarasinghe",
title = "{Halide}: a language and compiler for optimizing
parallelism, locality, and recomputation in image
processing pipelines",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "519--530",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462176",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Image processing pipelines combine the challenges of
stencil computations and stream programs. They are
composed of large graphs of different stencil stages,
as well as complex reductions, and stages with global
or data-dependent access patterns. Because of their
complex structure, the performance difference between a
naive implementation of a pipeline and an optimized one
is often an order of magnitude. Efficient
implementations require optimization of both
parallelism and locality, but due to the nature of
stencils, there is a fundamental tension between
parallelism, locality, and introducing redundant
recomputation of shared values. We present a systematic
model of the tradeoff space fundamental to stencil
pipelines, a schedule representation which describes
concrete points in this space for each stage in an
image processing pipeline, and an optimizing compiler
for the Halide image processing language that
synthesizes high performance implementations from a
Halide algorithm and a schedule. Combining this
compiler with stochastic search over the space of
schedules enables terse, composable programs to achieve
state-of-the-art performance on a wide range of real
image processing pipelines, and across different
hardware architectures, including multicores with SIMD,
and heterogeneous CPU+GPU execution. From simple Halide
programs written in a few hours, we demonstrate
performance up to 5x faster than hand-tuned C,
intrinsics, and CUDA implementations optimized by
experts over weeks or months, for image processing
applications beyond the reach of past automatic
compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '13 conference proceedings.",
}
@Article{Jia:2013:SID,
author = "Ning Jia and Chun Yang and Jing Wang and Dong Tong and
Keyi Wang",
title = "{SPIRE}: improving dynamic binary translation through
{SPC}-indexed indirect branch redirecting",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "1--12",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451516",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Dynamic binary translation system must perform an
address translation for every execution of indirect
branch instructions. The procedure to convert Source
binary Program Counter (SPC) address to Translated
Program Counter (TPC) address always takes more than 10
instructions, becoming a major source of performance
overhead. This paper proposes a novel mechanism called
SPc-Indexed REdirecting (SPIRE), which can
significantly reduce the indirect branch handling
overhead. SPIRE doesn't rely on hash lookup and address
mapping table to perform address translation. It reuses
the source binary code space to build a SPC-indexed
redirecting table. This table can be indexed directly
by SPC address without hashing. With SPIRE, the
indirect branch can jump to the originally SPC address
without address translation. The trampoline residing in
the SPC address will redirect the control flow to
related code cache. Only 2-6 instructions are needed to
handle an indirect branch execution. As part of the
source binary would be overwritten, a shadow page
mechanism is explored to keep transparency of the
corrupt source binary code page. Online profiling is
adopted to reduce the memory overhead. We have
implemented SPIRE on an x86 to x86 DBT system, and
discussed the implementation issues on different guest
and host architectures. The experiments show that,
compared with hash lookup mechanism, SPIRE can reduce
the performance overhead by 36.2\% on average, up to
51.4\%, while only 5.6\% extra memory is needed. SPIRE
can cooperate with other indirect branch handling
mechanisms easily, and we believe the idea of SPIRE can
also be applied on other occasions that need address
translation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{vonKoch:2013:LRB,
author = "Tobias J. K. Edler von Koch and Bj{\"o}rn Franke",
title = "Limits of region-based dynamic binary
parallelization",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "13--22",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Efficiently executing sequential legacy binaries on
chip multi-processors (CMPs) composed of many, small
cores is one of today's most pressing problems.
Single-threaded execution is a suboptimal option due to
CMPs' lower single-core performance, while
multi-threaded execution relies on prior
parallelization, which is severely hampered by the
low-level binary representation of applications
compiled and optimized for a single-core target. A
recent technology to address this problem is Dynamic
Binary Parallelization (DBP), which creates a Virtual
Execution Environment (VEE) taking advantage of the
underlying multicore host to transparently parallelize
the sequential binary executable. While still in its
infancy, DBP has received broad interest within the
research community. The combined use of DBP and
thread-level speculation (TLS) has been proposed as a
technique to accelerate legacy uniprocessor code on
modern CMPs. In this paper, we investigate the limits
of DBP and seek to gain an understanding of the factors
contributing to these limits and the costs and
overheads of its implementation. We have performed an
extensive evaluation using a parameterizable DBP system
targeting a CMP with light-weight architectural TLS
support. We demonstrate that there is room for a
significant reduction of up to 54\% in the number of
instructions on the critical paths of legacy SPEC
CPU2006 benchmarks. However, we show that it is much
harder to translate these savings into actual
performance improvements, with a realistic
hardware-supported implementation achieving a speedup
of 1.09 on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hsu:2013:IDB,
author = "Chun-Chen Hsu and Pangfeng Liu and Jan-Jan Wu and
Pen-Chung Yew and Ding-Yong Hong and Wei-Chung Hsu and
Chien-Min Wang",
title = "Improving dynamic binary optimization through
early-exit guided code region formation",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "23--32",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451519",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Most dynamic binary translators (DBT) and optimizers
(DBO) target binary traces, i.e. frequently executed
paths, as code regions to be translated and optimized.
Code region formation is the most important first step
in all DBTs and DBOs. The quality of the dynamically
formed code regions determines the extent and the types
of optimization opportunities that can be exposed to
DBTs and DBOs, and thus, determines the ultimate
quality of the final optimized code. The
Next-Executing-Tail (NET) trace formation method used
in HP Dynamo is an early example of such techniques.
Many existing trace formation schemes are variants of
NET. They work very well for most binary traces, but
they also suffer a major problem: the formed traces may
contain a large number of early exits that could be
branched out during the execution. If this happens
frequently, the program execution will spend more time
in the slow binary interpreter or in the unoptimized
code regions than in the optimized traces in code
cache. The benefit of the trace optimization is thus
lost. Traces/regions with frequently taken early-exits
are called delinquent traces/regions. Our empirical
study shows that at least 8 of the 12 SPEC CPU2006
integer benchmarks have delinquent traces. In this
paper, we propose a light-weight region formation
technique called Early-Exit Guided Region Formation
(EEG) to improve the quality of the formed
traces/regions. It iteratively identifies and merges
delinquent regions into larger code regions. We have
implemented our EEG algorithm in two LLVM-based
multi-threaded DBTs targeting ARM and IA32 instruction
set architecture (ISA), respectively. Using SPEC
CPU2006 benchmark suite with reference inputs, our
results show that compared to an NET-variant currently
used in QEMU, a state-of-the-art retargetable DBT, EEG
can achieve a significant performance improvement of up
to 72\% (27\% on average), and to 49\% (23\% on
average) for IA32 and ARM, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kaufmann:2013:SCO,
author = "Marco Kaufmann and Rainer G. Spallek",
title = "Superblock compilation and other optimization
techniques for a {Java}-based {DBT} machine emulator",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "33--40",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Superblock compilation techniques such as control flow
graph (CFG) or trace compilation have become a widely
adopted approach to increase the performance of
dynamically compiling virtual machines even further.
While this was shown to be successful for many
conventional virtual machines, it did not result in a
higher performance for Java-based DBT machine emulators
so far. These emulators dynamically translate
application binaries of a target machine into Java
bytecode, which is then eventually compiled into the
native code of the emulating host by the Java Virtual
Machine (JVM). Successful superblock compilation
techniques for this class of emulators must consider
the special requirements that result from the two-stage
translation as well as the characteristics of the JVM,
such as the inability of most Java JIT compilers to
handle large bytecode methods efficiently. In this
paper, we present a superblock compilation approach for
a Java-based DBT machine emulator that generates a
performance increase of up to 90 percent and of 32
percent on average. The key idea of our design is to
provide a large scope over the control flow of target
applications across basic block boundaries for the JVM,
while still keeping small bytecode methods for the
execution units. In addition, we also present two
further optimizations --- interpreter context
elimination and program counter elimination --- which
increase the emulation performance by 16 percent again.
In total, the optimization techniques discussed in this
paper provide an average performance gain of 48 percent
for the surveyed emulator.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jo:2013:ELM,
author = "Changyeon Jo and Erik Gustafsson and Jeongseok Son and
Bernhard Egger",
title = "Efficient live migration of virtual machines using
shared storage",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "41--50",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451524",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Live migration of virtual machines (VM) across
distinct physical hosts is an important feature of
virtualization technology for maintenance,
load-balancing and energy reduction, especially so for
data centers operators and cluster service providers.
Several techniques have been proposed to reduce the
downtime of the VM being transferred, often at the
expense of the total migration time. In this work, we
present a technique to reduce the total time required
to migrate a running VM from one host to another while
keeping the downtime to a minimum. Based on the
observation that modern operating systems use the
better part of the physical memory to cache data from
secondary storage, our technique tracks the VM's I/O
operations to the network-attached storage device and
maintains an updated mapping of memory pages that
currently reside in identical form on the storage
device. During the iterative pre-copy live migration
process, instead of transferring those pages from the
source to the target host, the memory-to-disk mapping
is sent to the target host which then fetches the
contents directly from the network-attached storage
device. We have implemented our approach into the Xen
hypervisor and ran a series of experiments with Linux
HVM guests. On average, the presented technique shows a
reduction of up over 30\% on average of the total
transfer time for a series of benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chiang:2013:IBM,
author = "Jui-Hao Chiang and Han-Lin Li and Tzi-cker Chiueh",
title = "Introspection-based memory de-duplication and
migration",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "51--62",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Memory virtualization abstracts a physical machine's
memory resource and presents to the virtual machines
running on it a piece of physical memory that could be
shared, compressed and moved. To optimize the memory
resource utilization by fully leveraging the
flexibility afforded by memory virtualization, it is
essential that the hypervisor have some sense of how
the guest VMs use their allocated physical memory. One
way to do this is virtual machine introspection (VMI),
which interprets byte values in a guest memory space
into semantically meaningful data structures. However,
identifying a guest VM's memory usage information such
as free memory pool is non-trivial. This paper
describes a bootstrapping VM introspection technique
that could accurately extract free memory pool
information from multiple versions of Windows and Linux
without kernel version-specific hard-coding, how to
apply this technique to improve the efficiency of
memory de-duplication and memory state migration, and
the resulting improvement in memory de-duplication
speed, gain in additional memory pages de-duplicated,
and reduction in traffic loads associated with memory
state migration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cui:2013:VMV,
author = "Lei Cui and Jianxin Li and Bo Li and Jinpeng Huai and
Chunming Hu and Tianyu Wo and Hussain Al-Aqrabi and Lu
Liu",
title = "{VMScatter}: migrate virtual machines to many hosts",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "63--72",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451528",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Live virtual machine migration is a technique often
used to migrate an entire OS with running applications
in a non-disruptive fashion. Prior works concerned with
one-to-one live migration with many techniques have
been proposed such as pre-copy, post-copy and
log/replay. In contrast, we propose VMScatter, a
one-to-many migration method to migrate virtual
machines from one to many other hosts simultaneously.
First, by merging the identical pages within or across
virtual machines, VMScatter multicasts only a single
copy of these pages to associated target hosts for
avoiding redundant transmission. This is impactful
practically when the same OS and similar applications
running in the virtual machines where there are plenty
of identical pages. Second, we introduce a novel
grouping algorithm to decide the placement of virtual
machines, distinguished from the previous schedule
algorithms which focus on the workload for load balance
or power saving, we also focus on network traffic,
which is a critical metric in data-intensive data
centers. Third, we schedule the multicast sequence of
packets to reduce the network overhead introduced by
joining or quitting the multicast groups of target
hosts. Compared to traditional live migration technique
in QEMU/KVM, VMScatter reduces 74.2\% of the total
transferred data, 69.1\% of the total migration time
and achieves the network traffic reduction from 50.1\%
to 70.3\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhou:2013:OVM,
author = "Ruijin Zhou and Fang Liu and Chao Li and Tao Li",
title = "Optimizing virtual machine live storage migration in
heterogeneous storage environment",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "73--84",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Virtual machine (VM) live storage migration techniques
significantly increase the mobility and manageability
of virtual machines in the era of cloud computing. On
the other hand, as solid state drives (SSDs) become
increasingly popular in data centers, VM live storage
migration will inevitably encounter heterogeneous
storage environments. Nevertheless, conventional
migration mechanisms do not consider the speed
discrepancy and SSD's wear-out issue, which not only
causes significant performance degradation but also
shortens SSD's lifetime. This paper, for the first
time, addresses the efficiency of VM live storage
migration in heterogeneous storage environments from a
multi-dimensional perspective, i.e., user experience,
device wearing, and manageability. We derive a flexible
metric (migration cost), which captures various design
preference. Based on that, we propose and prototype
three new storage migration strategies, namely: (1) Low
Redundancy (LR), which generates the least amount of
redundant writes; (2) Source-based Low Redundancy
(SLR), which keeps the balance between IO performance
and write redundancy; and (3) Asynchronous IO
Mirroring, which seeks the highest IO performance. The
evaluation of our prototyped system shows that our
techniques outperform existing live storage migration
by a significant margin. Furthermore, by adaptively
mixing our proposed schemes, the cost of massive VM
live storage migration can be even lower than that of
only using the best of individual mechanism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Song:2013:PLM,
author = "Xiang Song and Jicheng Shi and Ran Liu and Jian Yang
and Haibo Chen",
title = "Parallelizing live migration of virtual machines",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "85--96",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Live VM migration is one of the major primitive
operations to manage virtualized cloud platforms. Such
operation is usually mission-critical and disruptive to
the running services, and thus should be completed as
fast as possible. Unfortunately, with the increasing
amount of resources configured to a VM, such operations
are becoming increasingly time-consuming. In this
paper, we make a comprehensive analysis on the
parallelization opportunities of live VM migration on
two popular open-source VMMs (i.e., Xen and KVM). By
leveraging abundant resources like CPU cores and NICs
in contemporary server platforms, we design and
implement a system called PMigrate that leverages data
parallelism and pipeline parallelism to parallelize the
operation. As the parallelization framework requires
intensive mmap/munmap operations that tax the address
space management system in an operating system, we
further propose an abstraction called range lock, which
improves scalability of concurrent mutation to the
address space of an operating system (i.e., Linux) by
selectively replacing the per-process address space
lock inside kernel with dynamic and fine-grained range
locks that exclude costly operations on the requesting
address range from using the per-process lock.
Evaluation with our working prototype on Xen and KVM
shows that PMigrate accelerates the live VM migration
ranging from 2.49X to 9.88X, and decreases the downtime
ranging from 1.9X to 279.89X. Performance analysis
shows that our integration of range lock to Linux
significantly improves parallelism in mutating the
address space in VM migration and thus boosts the
performance ranging from 2.06X to 3.05X. We also show
that PMigrate makes only small disruption to other
co-hosted production VMs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fu:2013:EUD,
author = "Yangchun Fu and Zhiqiang Lin",
title = "{EXTERIOR}: using a dual-{VM} based external shell for
guest-{OS} introspection, configuration, and recovery",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "97--110",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "This paper presents EXTERIOR, a dual-VM architecture
based external shell that can be used for trusted,
timely out-of-VM management of guest-OS such as
introspection, configuration, and recovery. Inspired by
recent advances in virtual machine introspection (VMI),
EXTERIOR leverages an isolated, secure virtual machine
(SVM) to introspect the kernel state of a guest virtual
machine (GVM). However, it goes far beyond the
read-only capability of the traditional VMI, and can
perform automatic, fine-grained guest-OS writable
operations. The key idea of EXTERIOR is to use a
dual-VM architecture in which a SVM runs a kernel
identical to that of the GVM to create the necessary
environment for a running process (e.g., {\tt rmmod},
{\rr bkill}), and dynamically and transparently
redirect and update the memory state at the VMM layer
from SVM to GVM, thereby achieving the same effect in
terms of kernel state updates of running the same
trusted in-VM program inside the shell of GVM. A
proof-of-concept EXTERIOR has been implemented. The
experimental results show that EXTERIOR can be used for
a timely administration of guest-OS, including
introspection and (re)configuration of the guest-OS
state and timely response of kernel malware intrusions,
without any user account in the guest-OS.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dai:2013:LVM,
author = "Yuehua Dai and Yong Qi and Jianbao Ren and Yi Shi and
Xiaoguang Wang and Xuan Yu",
title = "A lightweight {VMM} on many core for high performance
computing",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "111--120",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451535",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Traditional Virtual Machine Monitor (VMM) virtualizes
some devices and instructions, which induces
performance overhead to guest operating systems.
Furthermore, the virtualization contributes a large
amount of codes to VMM, which makes a VMM prone to bugs
and vulnerabilities. On the other hand, in cloud
computing, cloud service provider configures virtual
machines based on requirements which are specified by
customers in advance. As resources in a multi-core
server increase to more than adequate in the future,
virtualization is not necessary although it provides
convenience for cloud computing. Based on the above
observations, this paper presents an alternative way
for constructing a VMM: configuring a booting interface
instead of virtualization technology. A lightweight
virtual machine monitor --- OSV is proposed based on
this idea. OSV can host multiple full functional Linux
kernels with little performance overhead. There are
only 6 hyper-calls in OSV. The Linux running on top of
OSV is intercepted only for the inter-processor
interrupts. The resource isolation is implemented with
hardware-assist virtualization. The resource sharing is
controlled by distributed protocols embedded in current
operating systems. We implement a prototype of OSV on
AMD Opteron processor based 32-core servers with SVM
and cache-coherent NUMA architectures. OSV can host up
to 8 Linux kernels on the server with less than 10
lines of code modifications to Linux kernel. OSV has
about 8000 lines of code which can be easily tuned and
debugged. The experiment results show that OSV VMM has
23.7\% performance improvement compared with Xen VMM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yamada:2013:TFT,
author = "Hiroshi Yamada and Kenji Kono",
title = "Traveling forward in time to newer operating systems
using {ShadowReboot}",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "121--130",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Operating system (OS) reboots are an essential part of
updating kernels and applications on laptops and
desktop PCs. Long downtime during OS reboots severely
disrupts users' computational activities. This long
disruption discourages the users from conducting OS
reboots, failing to enforce them to conduct software
updates. This paper presents ShadowReboot, a virtual
machine monitor (VMM)-based approach that shortens
downtime of OS reboots in software updates.
ShadowReboot conceals OS reboot activities from user's
applications by spawning a VM dedicated to an OS reboot
and systematically producing the rebooted state where
the updated kernel and applications are ready for use.
ShadowReboot provides an illusion to the users that the
guest OS travels forward in time to the rebooted state.
ShadowReboot offers the following advantages. It can be
used to apply patches to the kernels and even system
configuration updates. Next, it does not require any
special patch requiring detailed knowledge about the
target kernels. Lastly, it does not require any target
kernel modification. We implemented a prototype in
VirtualBox 4.0.10 OSE. Our experimental results show
that ShadowReboot successfully updated software on
unmodified commodity OS kernels and shortened the
downtime of commodity OS reboots on five Linux
distributions (Fedora, Ubuntu, Gentoo, Cent, and SUSE)
by 91 to 98\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jantz:2013:PPO,
author = "Michael R. Jantz and Prasad A. Kulkarni",
title = "Performance potential of optimization phase selection
during dynamic {JIT} compilation",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "131--142",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Phase selection is the process of customizing the
applied set of compiler optimization phases for
individual functions or programs to improve performance
of generated code. Researchers have recently developed
novel feature-vector based heuristic techniques to
perform phase selection during online JIT compilation.
While these heuristics improve program startup speed,
steady-state performance was not seen to benefit over
the default fixed single sequence baseline.
Unfortunately, it is still not conclusively known
whether this lack of steady-state performance gain is
due to a failure of existing online phase selection
heuristics, or because there is, indeed, little or no
speedup to be gained by phase selection in online JIT
environments. The goal of this work is to resolve this
question, while examining the phase selection related
behavior of optimizations, and assessing and improving
the effectiveness of existing heuristic solutions. We
conduct experiments to find and understand the potency
of the factors that can cause the phase selection
problem in JIT compilers. Next, using long-running
genetic algorithms we determine that program-wide and
method-specific phase selection in the HotSpot JIT
compiler can produce ideal steady-state performance
gains of up to 15\% (4.3\% average) and 44\% (6.2\%
average) respectively. We also find that existing
state-of-the-art heuristic solutions are unable to
realize these performance gains (in our experimental
setup), discuss possible causes, and show that
exploiting knowledge of optimization phase behavior can
help improve such heuristic solutions. Our work
develops a robust open-source production-quality
framework using the HotSpot JVM to further explore this
problem in the future.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lameed:2013:MAS,
author = "Nurudeen A. Lameed and Laurie J. Hendren",
title = "A modular approach to on-stack replacement in {LLVM}",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "143--154",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451541",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/matlab.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "On-stack replacement (OSR) is a technique that allows
a virtual machine to interrupt running code during the
execution of a function/method, to re-optimize the
function on-the-fly using an optimizing JIT compiler,
and then to resume the interrupted function at the
point and state at which it was interrupted. OSR is
particularly useful for programs with potentially
long-running loops, as it allows dynamic optimization
of those loops as soon as they become hot. This paper
presents a modular approach to implementing OSR for the
LLVM compiler infrastructure. This is an important step
forward because LLVM is gaining popular support, and
adding the OSR capability allows compiler developers to
develop new dynamic techniques. In particular, it will
enable more sophisticated LLVM-based JIT compiler
approaches. Indeed, other compiler/VM developers can
use our approach because it is a clean modular addition
to the standard LLVM distribution. Further, our
approach is defined completely at the LLVM-IR level and
thus does not require any modifications to the target
code generation. The OSR implementation can be used by
different compilers to support a variety of dynamic
optimizations. As a demonstration of our OSR approach,
we have used it to support dynamic inlining in McVM.
McVM is a virtual machine for MATLAB which uses a
LLVM-based JIT compiler. MATLAB is a popular dynamic
language for scientific and engineering applications
that typically manipulate large matrices and often
contain long-running loops, and is thus an ideal target
for dynamic JIT compilation and OSRs. Using our McVM
example, we demonstrate reasonable overheads for our
benchmark set, and performance improvements when using
it to perform dynamic inlining.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jantz:2013:FAG,
author = "Michael R. Jantz and Carl Strickland and Karthik Kumar
and Martin Dimitrov and Kshitij A. Doshi",
title = "A framework for application guidance in virtual memory
systems",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "155--166",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451543",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "This paper proposes a collaborative approach in which
applications can provide guidance to the operating
system regarding allocation and recycling of physical
memory. The operating system incorporates this guidance
to decide which physical page should be used to back a
particular virtual page. The key intuition behind this
approach is that application software, as a generator
of memory accesses, is best equipped to inform the
operating system about the relative access rates and
overlapping patterns of usage of its own address space.
It is also capable of steering its own algorithms in
order to keep its dynamic memory footprint under check
when there is a need to reduce power or to contain the
spillover effects from bursts in demand. Application
software, working cooperatively with the operating
system, can therefore help the latter schedule memory
more effectively and efficiently than when the
operating system is forced to act alone without such
guidance. It is particularly difficult to achieve power
efficiency without application guidance since power
expended in memory is a function not merely of the
intensity with which memory is accessed in time but
also how many physical ranks are affected by an
application's memory usage. Our framework introduces an
abstraction called ``colors'' for the application to
communicate its intent to the operating system. We
modify the operating system to receive this
communication in an efficient way, and to organize
physical memory pages into intermediate level grouping
structures called ``trays'' which capture the
physically independent access channels and self-refresh
domains, so that it can apply this guidance without
entangling the application in lower level details of
power or bandwidth management. This paper describes how
we re-architect the memory management of a recent Linux
kernel to realize a three way collaboration between
hardware, supervisory software, and application
tasks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2013:TVR,
author = "Chen Chen and Petros Maniatis and Adrian Perrig and
Amit Vasudevan and Vyas Sekar",
title = "Towards verifiable resource accounting for outsourced
computation",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "167--178",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451546",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Outsourced computation services should ideally only
charge customers for the resources used by their
applications. Unfortunately, no verifiable basis for
service providers and customers to reconcile resource
accounting exists today. This leads to undesirable
outcomes for both providers and consumers-providers
cannot prove to customers that they really devoted the
resources charged, and customers cannot verify that
their invoice maps to their actual usage. As a result,
many practical and theoretical attacks exist, aimed at
charging customers for resources that their
applications did not consume. Moreover, providers
cannot charge consumers precisely, which causes them to
bear the cost of unaccounted resources or pass these
costs inefficiently to their customers. We introduce
ALIBI, a first step toward a vision for verifiable
resource accounting. ALIBI places a minimal, trusted
reference monitor underneath the service provider's
software platform. This monitor observes resource
allocation to customers' guest virtual machines and
reports those observations to customers, for verifiable
reconciliation. In this paper, we show that ALIBI
efficiently and verifiably tracks guests' memory use
and CPU-cycle consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhou:2013:LPC,
author = "Ruijin Zhou and Tao Li",
title = "Leveraging phase change memory to achieve efficient
virtual machine execution",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "179--190",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451547",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "Virtualization technology is being widely adopted by
servers and data centers in the cloud computing era to
improve resource utilization and energy efficiency.
Nevertheless, the heterogeneous memory demands from
multiple virtual machines (VM) make it more challenging
to design efficient memory systems. Even worse, mission
critical VM management activities (e.g. checkpointing)
could incur significant runtime overhead due to
intensive IO operations. In this paper, we propose to
leverage the adaptable and non-volatile features of the
emerging phase change memory (PCM) to achieve efficient
virtual machine execution. Towards this end, we exploit
VM-aware PCM management mechanisms, which (1) smartly
tune SLC/MLC page allocation within a single VM and
across different VMs and (2) keep critical
checkpointing pages in PCM to reduce I/O traffic.
Experimental results show that our single VM design
(IntraVM) improves performance by 10\% and 20\%
compared to pure SLC- and MLC- based systems. Further
incorporating VM-aware resource management schemes
(IntraVM+InterVM) increases system performance by 15\%.
In addition, our design saves 46\% of
checkpoint/restore duration and reduces 50\% of overall
IO penalty to the system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ouyang:2013:PTS,
author = "Jiannan Ouyang and John R. Lange",
title = "Preemptable ticket spinlocks: improving consolidated
performance in the cloud",
journal = j-SIGPLAN,
volume = "48",
number = "7",
pages = "191--200",
month = jul,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517326.2451549",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:55:17 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "VEE '13 Conference proceedings.",
abstract = "When executing inside a virtual machine environment,
OS level synchronization primitives are faced with
significant challenges due to the scheduling behavior
of the underlying virtual machine monitor. Operations
that are ensured to last only a short amount of time on
real hardware, are capable of taking considerably
longer when running virtualized. This change in
assumptions has significant impact when an OS is
executing inside a critical region that is protected by
a spinlock. The interaction between OS level spinlocks
and VMM scheduling is known as the Lock Holder
Preemption problem and has a significant impact on
overall VM performance. However, with the use of ticket
locks instead of generic spinlocks, virtual
environments must also contend with waiters being
preempted before they are able to acquire the lock.
This has the effect of blocking access to a lock, even
if the lock itself is available. We identify this
scenario as the Lock Waiter Preemption problem. In
order to solve both problems we introduce Preemptable
Ticket spinlocks, a new locking primitive that is
designed to enable a VM to always make forward progress
by relaxing the ordering guarantees offered by ticket
locks. We show that the use of Preemptable Ticket
spinlocks improves VM performance by 5.32X on average,
when running on a non paravirtual VMM, and by 7.91X
when running on a VMM that supports a paravirtual
locking interface, when executing a set of
microbenchmarks as well as a realistic e-commerce
benchmark.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yang:2013:PSC,
author = "Chao Yang and Wei Xue and Haohuan Fu and Lin Gan and
Linfeng Li and Yangtong Xu and Yutong Lu and Jiachang
Sun and Guangwen Yang and Weimin Zheng",
title = "A peta-scalable {CPU-GPU} algorithm for global
atmospheric simulations",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "1--12",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Developing highly scalable algorithms for global
atmospheric modeling is becoming increasingly important
as scientists inquire to understand behaviors of the
global atmosphere at extreme scales. Nowadays,
heterogeneous architecture based on both processors and
accelerators is becoming an important solution for
large-scale computing. However, large-scale simulation
of the global atmosphere brings a severe challenge to
the development of highly scalable algorithms that fit
well into state-of-the-art heterogeneous systems.
Although successes have been made on GPU-accelerated
computing in some top-level applications, studies on
fully exploiting heterogeneous architectures in global
atmospheric modeling are still very less to be seen,
due in large part to both the computational
difficulties of the mathematical models and the
requirement of high accuracy for long term simulations.
In this paper, we propose a peta-scalable hybrid
algorithm that is successfully applied in a
cubed-sphere shallow-water model in global atmospheric
simulations. We employ an adjustable partition between
CPUs and GPUs to achieve a balanced utilization of the
entire hybrid system, and present a pipe-flow scheme to
conduct conflict-free inter-node communication on the
cubed-sphere geometry and to maximize
communication-computation overlap. Systematic
optimizations for multithreading on both GPU and CPU
sides are performed to enhance computing throughput and
improve memory efficiency. Our experiments demonstrate
nearly ideal strong and weak scalabilities on up to
3,750 nodes of the Tianhe-1A. The largest run sustains
a performance of 0.8 Pflops in double precision (32\%
of the peak performance), using 45,000 CPU cores and
3,750 GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lifflander:2013:APF,
author = "Jonathan Lifflander and Phil Miller and Laxmikant
Kale",
title = "Adoption protocols for fanout-optimal fault-tolerant
termination detection",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "13--22",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442519",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Termination detection is relevant for signaling
completion (all processors are idle and no messages are
in flight) of many operations in distributed systems,
including work stealing algorithms, dynamic data
exchange, and dynamically structured computations. In
the face of growing supercomputers with increasing
likelihood that each job may encounter faults, it is
important for high-performance computing applications
that rely on termination detection that such an
algorithm be able to tolerate the inevitable faults. We
provide a trio of new practical fault tolerance schemes
for a standard approach to termination detection that
are easy to implement, present low overhead in both
theory and practice, and have scalable costs when
recovering from faults. These schemes tolerate all
single-process faults, and are probabilistically
tolerant of faults affecting multiple processes. We
combine the theoretical failure probabilities we can
calculate for each algorithm with historical fault
records from real machines to show that these
algorithms have excellent overall survivability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yuki:2013:ADA,
author = "Tomofumi Yuki and Paul Feautrier and Sanjay Rajopadhye
and Vijay Saraswat",
title = "Array dataflow analysis for polyhedral {X10}
programs",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "23--34",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442520",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "This paper addresses the static analysis of an
important class of X10 programs, namely those with
finish/async parallelism, and affine loops and array
reference structure as in the polyhedral model. For
such programs our analysis can certify whenever a
program is deterministic or flags races. Our key
contributions are (i) adaptation of array dataflow
analysis from the polyhedral model to programs with
finish/async parallelism, and (ii) use of the array
dataflow analysis result to certify determinacy. We
distinguish our work from previous approaches by
combining the precise statement instance-wise and array
element-wise analysis capability of the polyhedral
model with finish/async programs that are more
expressive than DOALL parallelism commonly considered
in the polyhedral literature. We show that our approach
is exact (no false negative/positives) and more precise
than previous approaches, but is limited to programs
that fit the polyhedral model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Prountzos:2013:BCA,
author = "Dimitrios Prountzos and Keshav Pingali",
title = "Betweenness centrality: algorithms and
implementations",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "35--46",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Betweenness centrality is an important metric in the
study of social networks, and several algorithms for
computing this metric exist in the literature. This
paper makes three contributions. First, we show that
the problem of computing betweenness centrality can be
formulated abstractly in terms of a small set of
operators that update the graph. Second, we show that
existing parallel algorithms for computing betweenness
centrality can be viewed as implementations of
different schedules for these operators, permitting all
these algorithms to be formulated in a single
framework. Third, we derive a new asynchronous parallel
algorithm for betweenness centrality that (i) works
seamlessly for both weighted and unweighted graphs,
(ii) can be applied to large graphs, and (iii) is able
to extract large amounts of parallelism. We implemented
this algorithm and compared it against a number of
publicly available implementations of previous
algorithms on two different multicore architectures.
Our results show that the new algorithm is the best
performing one in most cases, particularly for large
graphs and large thread counts, and is always
competitive against other algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Xiang:2013:CAM,
author = "Lingxiang Xiang and Michael Lee Scott",
title = "Compiler aided manual speculation for high performance
concurrent data structures",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "47--56",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Speculation is a well-known means of increasing
parallelism among concurrent methods that are usually
but not always independent. Traditional nonblocking
data structures employ a particularly restrictive form
of speculation. Software transactional memory (STM)
systems employ a much more general---though typically
blocking---form, and there is a wealth of options in
between. Using several different concurrent data
structures as examples, we show that manual addition of
speculation to traditional lock-based code can lead to
significant performance improvements. Successful
speculation requires careful consideration of
profitability, and of how and when to validate
consistency. Unfortunately, it also requires
substantial modifications to code structure and a deep
understanding of the memory model. These latter
requirements make it difficult to use in its purely
manual form, even for expert programmers. To simplify
the process, we present a compiler tool, CSpec, that
automatically generates speculative code from baseline
lock-based code with user annotations. Compiler-aided
manual speculation keeps the original code structure
for better readability and maintenance, while providing
the flexibility to chose speculation and validation
strategies. Experiments on UltraSPARC and x86 platforms
demonstrate that with a small number annotations added
to lock-based code, CSpec can generate speculative code
that matches the performance of best-effort
hand-written versions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wu:2013:CAA,
author = "Bo Wu and Zhijia Zhao and Eddy Zheng Zhang and Yunlian
Jiang and Xipeng Shen",
title = "Complexity analysis and algorithm design for
reorganizing data to minimize non-coalesced memory
accesses on {GPU}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "57--68",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442523",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "The performance of Graphic Processing Units (GPU) is
sensitive to irregular memory references. Some recent
work shows the promise of data reorganization for
eliminating non-coalesced memory accesses that are
caused by irregular references. However, all previous
studies have employed simple, heuristic methods to
determine the new data layouts to create. As a result,
they either do not provide any performance guarantee or
are effective to only some limited scenarios. This
paper contributes a fundamental study to the problem.
It systematically analyzes the inherent complexity of
the problem in various settings, and for the first
time, proves that the problem is NP-complete. It then
points out the limitations of existing techniques and
reveals that in practice, the essence for designing an
appropriate data reorganization algorithm can be
reduced to a tradeoff among space, time, and
complexity. Based on that insight, it develops two new
data reorganization algorithms to overcome the
limitations of previous methods. Experiments show that
an assembly composed of the new algorithms and a
previous algorithm can circumvent the inherent
complexity in finding optimal data layouts, making it
feasible to minimize non-coalesced memory accesses for
a variety of irregular applications and settings that
are beyond the reach of existing techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Le:2013:CEW,
author = "Nhat Minh L{\^e} and Antoniu Pop and Albert Cohen and
Francesco Zappa Nardelli",
title = "Correct and efficient work-stealing for weak memory
models",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "69--80",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442524",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Chase and Lev's concurrent deque is a key data
structure in shared-memory parallel programming and
plays an essential role in work-stealing schedulers. We
provide the first correctness proof of an optimized
implementation of Chase and Lev's deque on top of the
POWER and ARM architectures: these provide very relaxed
memory models, which we exploit to improve performance
but considerably complicate the reasoning. We also
study an optimized x86 and a portable C11
implementation, conducting systematic experiments to
evaluate the impact of memory barrier optimizations.
Our results demonstrate the benefits of hand tuning the
deque code when running on top of relaxed memory
models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bergstrom:2013:DOF,
author = "Lars Bergstrom and Matthew Fluet and Mike Rainey and
John Reppy and Stephen Rosen and Adam Shaw",
title = "Data-only flattening for nested data parallelism",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "81--92",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Data parallelism has proven to be an effective
technique for high-level programming of a certain class
of parallel applications, but it is not well suited to
irregular parallel computations. Blelloch and others
proposed nested data parallelism (NDP) as a language
mechanism for programming irregular parallel
applications in a declarative data-parallel style. The
key to this approach is a compiler transformation that
flattens the NDP computation and data structures into a
form that can be executed efficiently on a wide-vector
SIMD architecture. Unfortunately, this technique is ill
suited to execution on today's multicore machines. We
present a new technique, called data-only flattening,
for the compilation of NDP, which is suitable for
multicore architectures. Data-only flattening
transforms nested data structures in order to expose
programs to various optimizations while leaving control
structures intact. We present a formal semantics of
data-only flattening in a core language with a
rewriting system. We demonstrate the effectiveness of
this technique in the Parallel ML implementation and we
report encouraging experimental results across various
benchmark applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Morozov:2013:DMT,
author = "Dmitriy Morozov and Gunther Weber",
title = "Distributed merge trees",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "93--102",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Improved simulations and sensors are producing
datasets whose increasing complexity exhausts our
ability to visualize and comprehend them directly. To
cope with this problem, we can detect and extract
significant features in the data and use them as the
basis for subsequent analysis. Topological methods are
valuable in this context because they provide robust
and general feature definitions. As the growth of
serial computational power has stalled, data analysis
is becoming increasingly dependent on massively
parallel machines. To satisfy the computational demand
created by complex datasets, algorithms need to
effectively utilize these computer architectures. The
main strength of topological methods, their emphasis on
global information, turns into an obstacle during
parallelization. We present two approaches to alleviate
this problem. We develop a distributed representation
of the merge tree that avoids computing the global tree
on a single processor and lets us parallelize
subsequent queries. To account for the increasing
number of cores per processor, we develop a new data
structure that lets us take advantage of multiple
shared-memory cores to parallelize the work on a single
node. Finally, we present experiments that illustrate
the strengths of our approach as well as help identify
future challenges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Morrison:2013:FCQ,
author = "Adam Morrison and Yehuda Afek",
title = "Fast concurrent queues for x86 processors",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "103--112",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442527",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Conventional wisdom in designing concurrent data
structures is to use the most powerful synchronization
primitive, namely compare-and-swap (CAS), and to avoid
contended hot spots. In building concurrent FIFO
queues, this reasoning has led researchers to propose
combining-based concurrent queues. This paper takes a
different approach, showing how to rely on
fetch-and-add (F\&A), a less powerful primitive that is
available on x86 processors, to construct a nonblocking
(lock-free) linearizable concurrent FIFO queue which,
despite the F\&A being a contended hot spot,
outperforms combining-based implementations by 1.5x to
2.5x in all concurrency levels on an x86 server with
four multicore processors, in both single-processor and
multi-processor executions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wamhoff:2013:FIP,
author = "Jons-Tobias Wamhoff and Christof Fetzer and Pascal
Felber and Etienne Rivi{\`e}re and Gilles Muller",
title = "{FastLane}: improving performance of software
transactional memory for low thread counts",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "113--122",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442528",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Software transactional memory (STM) can lead to
scalable implementations of concurrent programs, as the
relative performance of an application increases with
the number of threads that support it. However, the
absolute performance is typically impaired by the
overheads of transaction management and instrumented
accesses to shared memory. This often leads STM-based
programs with low thread counts to perform worse than a
sequential, non-instrumented version of the same
application. In this paper, we propose FastLane, a new
STM algorithm that bridges the performance gap between
sequential execution and classical STM algorithms when
running on few cores. FastLane seeks to reduce
instrumentation costs and thus performance degradation
in its target operation range. We introduce a novel
algorithm that differentiates between two types of
threads: One thread (the master) executes transactions
pessimistically without ever aborting, thus with
minimal instrumentation and management costs, while
other threads (the helpers) can commit speculative
transactions only when they do not conflict with the
master. Helpers thus contribute to the application
progress without impairing on the performance of the
master. We implement FastLane as an extension of a
state-of-the-art STM runtime system and compiler.
Multiple code paths are produced for execution on a
single, few, and many cores. The runtime system selects
the code path providing the best throughput, depending
on the number of cores available on the target machine.
Evaluation results indicate that our approach provides
promising performance at low thread counts: FastLane
almost systematically wins over a classical STM in the
1-6 threads range, and often performs better than
sequential execution of the non-instrumented version of
the same application starting with 2 threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Barthe:2013:RVS,
author = "Gilles Barthe and Juan Manuel Crespo and Sumit Gulwani
and Cesar Kunz and Mark Marron",
title = "From relational verification to {SIMD} loop
synthesis",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "123--134",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Existing pattern-based compiler technology is unable
to effectively exploit the full potential of SIMD
architectures. We present a new program synthesis based
technique for auto-vectorizing performance critical
innermost loops. Our synthesis technique is applicable
to a wide range of loops, consistently produces
performant SIMD code, and generates correctness proofs
for the output code. The synthesis technique, which
leverages existing work on relational verification
methods, is a novel combination of deductive loop
restructuring, synthesis condition generation and a new
inductive synthesis algorithm for producing loop-free
code fragments. The inductive synthesis algorithm wraps
an optimized depth-first exploration of code sequences
inside a CEGIS loop. Our technique is able to quickly
produce SIMD implementations (up to 9 instructions in
0.12 seconds) for a wide range of fundamental looping
structures. The resulting SIMD implementations
outperform the original loops by 2.0x-3.7x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shun:2013:LLG,
author = "Julian Shun and Guy E. Blelloch",
title = "{Ligra}: a lightweight graph processing framework for
shared memory",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "135--146",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442530",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "There has been significant recent interest in parallel
frameworks for processing graphs due to their
applicability in studying social networks, the Web
graph, networks in biology, and unstructured meshes in
scientific simulation. Due to the desire to process
large graphs, these systems have emphasized the ability
to run on distributed memory machines. Today, however,
a single multicore server can support more than a
terabyte of memory, which can fit graphs with tens or
even hundreds of billions of edges. Furthermore, for
graph algorithms, shared-memory multicores are
generally significantly more efficient on a per core,
per dollar, and per joule basis than distributed memory
systems, and shared-memory algorithms tend to be
simpler than their distributed counterparts. In this
paper, we present a lightweight graph processing
framework that is specific for shared-memory
parallel/multicore machines, which makes graph
traversal algorithms easy to write. The framework has
two very simple routines, one for mapping over edges
and one for mapping over vertices. Our routines can be
applied to any subset of the vertices, which makes the
framework useful for many graph traversal algorithms
that operate on subsets of the vertices. Based on
recent ideas used in a very fast algorithm for
breadth-first search (BFS), our routines automatically
adapt to the density of vertex sets. We implement
several algorithms in this framework, including BFS,
graph radii estimation, graph connectivity, betweenness
centrality, PageRank and single-source shortest paths.
Our algorithms expressed using this framework are very
simple and concise, and perform almost as well as
highly optimized code. Furthermore, they get good
speedups on a 40-core machine and are significantly
more efficient than previously reported results using
graph frameworks on machines with many more cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nasre:2013:MAG,
author = "Rupesh Nasre and Martin Burtscher and Keshav Pingali",
title = "Morph algorithms on {GPUs}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "147--156",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "There is growing interest in using GPUs to accelerate
graph algorithms such as breadth-first search,
computing page-ranks, and finding shortest paths.
However, these algorithms do not modify the graph
structure, so their implementation is relatively easy
compared to general graph algorithms like mesh
generation and refinement, which morph the underlying
graph in non-trivial ways by adding and removing nodes
and edges. We know relatively little about how to
implement morph algorithms efficiently on GPUs. In this
paper, we present and study four morph algorithms: (i)
a computational geometry algorithm called Delaunay Mesh
Refinement (DMR), (ii) an approximate SAT solver called
Survey Propagation (SP), (iii) a compiler analysis
called Points-To Analysis (PTA), and (iv) Boruvka's
Minimum Spanning Tree algorithm (MST). Each of these
algorithms modifies the graph data structure in
different ways and thus poses interesting challenges.
We overcome these challenges using algorithmic and
GPU-specific optimizations. We propose efficient
techniques to perform concurrent subgraph addition,
subgraph deletion, conflict detection and several
optimizations to improve the scalability of morph
algorithms. For an input mesh with 10 million
triangles, our DMR code achieves an 80x speedup over
the highly optimized serial Triangle program and a 2.3x
speedup over a multicore implementation running with 48
threads. Our SP code is 3x faster than a multicore
implementation with 48 threads on an input with 1
million literals. The PTA implementation is able to
analyze six SPEC 2000 benchmark programs in just 74
milliseconds, achieving a geometric mean speedup of
9.3x over a 48-thread multicore version. Our MST code
is slower than a multicore version with 48 threads for
sparse graphs but significantly faster for denser
graphs. This work provides several insights into how
other morph algorithms can be efficiently implemented
on GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Calciu:2013:NAR,
author = "Irina Calciu and Dave Dice and Yossi Lev and Victor
Luchangco and Virendra J. Marathe and Nir Shavit",
title = "{NUMA}-aware reader-writer locks",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "157--166",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442532",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Non-Uniform Memory Access (NUMA) architectures are
gaining importance in mainstream computing systems due
to the rapid growth of multi-core multi-chip machines.
Extracting the best possible performance from these new
machines will require us to revisit the design of the
concurrent algorithms and synchronization primitives
which form the building blocks of many of today's
applications. This paper revisits one such critical
synchronization primitive --- the reader-writer lock.
We present what is, to the best of our knowledge, the
first family of reader-writer lock algorithms tailored
to NUMA architectures. We present several variations
which trade fairness between readers and writers for
higher concurrency among readers and better
back-to-back batching of writers from the same NUMA
node. Our algorithms leverage the lock cohorting
technique to manage synchronization between writers in
a NUMA-friendly fashion, binary flags to coordinate
readers and writers, and simple distributed reader
counter implementations to enable NUMA-friendly
concurrency among readers. The end result is a
collection of surprisingly simple NUMA-aware algorithms
that outperform the state-of-the-art reader-writer
locks by up to a factor of 10 in our microbenchmark
experiments. To evaluate our algorithms in a realistic
setting we also present performance results of the {\tt
kccachetest} benchmark of the Kyoto-Cabinet
distribution, an open-source database which makes heavy
use of pthread reader-writer locks. Our locks boost the
performance of {\tt kccachetest} by up to 40\% over the
best prior alternatives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2013:OAO,
author = "Zizhong Chen",
title = "{Online-ABFT}: an online algorithm based fault
tolerance scheme for soft error detection in iterative
methods",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "167--176",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442533",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Soft errors are one-time events that corrupt the state
of a computing system but not its overall
functionality. Large supercomputers are especially
susceptible to soft errors because of their large
number of components. Soft errors can generally be
detected offline through the comparison of the final
computation results of two duplicated computations, but
this approach often introduces significant overhead.
This paper presents Online-ABFT, a simple but efficient
online soft error detection technique that can detect
soft errors in the widely used Krylov subspace
iterative methods in the middle of the program
execution so that the computation efficiency can be
improved through the termination of the corrupted
computation in a timely manner soon after a soft error
occurs. Based on a simple verification of orthogonality
and residual, Online-ABFT is easy to implement and
highly efficient. Experimental results demonstrate
that, when this online error detection approach is used
together with checkpointing, it improves the time to
obtain correct results by up to several orders of
magnitude over the traditional offline approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Friedley:2013:OPE,
author = "Andrew Friedley and Torsten Hoefler and Greg
Bronevetsky and Andrew Lumsdaine and Ching-Chen Ma",
title = "Ownership passing: efficient distributed memory
programming on multi-core systems",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "177--186",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "The number of cores in multi- and many-core
high-performance processors is steadily increasing.
MPI, the de-facto standard for programming
high-performance computing systems offers a distributed
memory programming model. MPI's semantics force a copy
from one process' send buffer to another process'
receive buffer. This makes it difficult to achieve the
same performance on modern hardware than shared memory
programs which are arguably harder to maintain and
debug. We propose generalizing MPI's communication
model to include ownership passing, which make it
possible to fully leverage the shared memory hardware
of multi- and many-core CPUs to stream communicated
data concurrently with the receiver's computations on
it. The benefits and simplicity of message passing are
retained by extending MPI with calls to send (pass)
ownership of memory regions, instead of their contents,
between processes. Ownership passing is achieved with a
hybrid MPI implementation that runs MPI processes as
threads and is mostly transparent to the user. We
propose an API and a static analysis technique to
transform legacy MPI codes automatically and
transparently to the programmer, demonstrating that
this scheme is easy to use in practice. Using the
ownership passing technique, we see up to 51\%
communication speedups over a standard message passing
implementation on state-of-the art multicore systems.
Our analysis and interface will lay the groundwork for
future development of MPI-aware optimizing compilers
and multi-core specific optimizations, which will be
key for success in current and next-generation
computing platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Meyerovich:2013:PSS,
author = "Leo A. Meyerovich and Matthew E. Torok and Eric
Atkinson and Rastislav Bodik",
title = "Parallel schedule synthesis for attribute grammars",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "187--196",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442535",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "We examine how to synthesize a parallel schedule of
structured traversals over trees. In our system,
programs are declaratively specified as attribute
grammars. Our synthesizer automatically, correctly, and
quickly schedules the attribute grammar as a
composition of parallel tree traversals. Our downstream
compiler optimizes for GPUs and multicore CPUs. We
provide support for designing efficient schedules.
First, we introduce a declarative language of schedules
where programmers may constrain any part of the
schedule and the synthesizer will complete and autotune
the rest. Furthermore, the synthesizer answers
debugging queries about how schedules may be completed.
We evaluate our approach with two case studies. First,
we created the first parallel schedule for a large
fragment of CSS and report a 3X multicore speedup.
Second, we created an interactive GPU-accelerated
animation of over 100,000 nodes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Deo:2013:PSA,
author = "Mrinal Deo and Sean Keely",
title = "Parallel suffix array and least common prefix for the
{GPU}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "197--206",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Suffix Array (SA) is a data structure formed by
sorting the suffixes of a string into lexicographic
order. SAs have been used in a variety of applications,
most notably in pattern matching and Burrows--Wheeler
Transform (BWT) based lossless data compression. SAs
have also become the data structure of choice for many,
if not all, string processing problems to which suffix
tree methodology is applicable. Over the last two
decades researchers have proposed many suffix array
construction algorithm (SACAs). We do a systematic
study of the main classes of SACAs with the intent of
mapping them onto a data parallel architecture like the
GPU. We conclude that skew algorithm [12], a linear
time recursive algorithm, is the best candidate for
GPUs as all its phases can be efficiently mapped to a
data parallel hardware. Our OpenCL implementation of
skew algorithm achieves a throughput of up to 25
MStrings/sec and a speedup of up to 34x and 5.8x over a
single threaded CPU implementation using a discrete GPU
and APU respectively. We also compare our OpenCL
implementation against the fastest known CPU
implementation based on induced copying and achieve a
speedup of up to 3.7x. Using SA we construct BWT on GPU
and achieve a speedup of 11x over the fastest known BWT
on GPU. Suffix arrays are often augmented with the
longest common prefix (LCP) information. We design a
novel high-performance parallel algorithm for computing
LCP on the GPU. Our GPU implementation of LCP achieves
a speedup of up to 25x and 4.3x on discrete GPU and APU
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2013:SDR,
author = "Yufei Chen and Haibo Chen",
title = "Scalable deterministic replay in a parallel
full-system emulator",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "207--218",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442537",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Full-system emulation has been an extremely useful
tool in developing and debugging systems software like
operating systems and hypervisors. However, current
full-system emulators lack the support for
deterministic replay, which limits the reproducibility
of concurrency bugs that is indispensable for analyzing
and debugging the essentially multi-threaded systems
software. This paper analyzes the challenges in
supporting deterministic replay in parallel full-system
emulators and makes a comprehensive study on the
sources of non-determinism. Unlike application-level
replay systems, our system, called ReEmu, needs to log
sources of non-determinism in both the guest software
stack and the dynamic binary translator for faithful
replay. To provide scalable and efficient record and
replay on multicore machines, ReEmu makes several
notable refinements to the CREW protocol that replays
shared memory systems. First, being aware of the
performance bottlenecks in frequent lock operations in
the CREW protocol, ReEmu refines the CREW protocol with
a seqlock-like design, to avoid serious contention and
possible starvation in instrumentation code tracking
dependence of racy accesses on a shared memory object.
Second, to minimize the required log files, ReEmu only
logs minimal local information regarding accesses to a
shared memory location, but instead relies on an
offline log processing tool to derive precise shared
memory dependence for faithful replay. Third, ReEmu
adopts an automatic lock clustering mechanism that
clusters a set of uncontended memory objects to a bulk
to reduce the frequencies of lock operations, which
noticeably boost performance. Our prototype ReEmu is
based on our open-source COREMU system and supports
scalable and efficient record and replay of full-system
environments (both x64 and ARM). Performance evaluation
shows that ReEmu has very good performance scalability
on an Intel multicore machine. It incurs only 68.9\%
performance overhead on average (ranging from 51.8\% to
94.7\%) over vanilla COREMU to record five PARSEC
benchmarks running on a 16-core emulated system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Acar:2013:SPP,
author = "Umut A. Acar and Arthur Chargueraud and Mike Rainey",
title = "Scheduling parallel programs by work stealing with
private deques",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "219--228",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442538",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Work stealing has proven to be an effective method for
scheduling parallel programs on multicore computers. To
achieve high performance, work stealing distributes
tasks between concurrent queues, called deques, which
are assigned to each processor. Each processor operates
on its deque locally except when performing load
balancing via steals. Unfortunately, concurrent deques
suffer from two limitations: (1) local deque operations
require expensive memory fences in modern weak-memory
architectures, (2) they can be very difficult to extend
to support various optimizations and flexible forms of
task distribution strategies needed many applications,
e.g., those that do not fit nicely into the
divide-and-conquer, nested data parallel paradigm. For
these reasons, there has been a lot recent interest in
implementations of work stealing with non-concurrent
deques, where deques remain entirely private to each
processor and load balancing is performed via message
passing. Private deques eliminate the need for memory
fences from local operations and enable the design and
implementation of efficient techniques for reducing
task-creation overheads and improving task
distribution. These advantages, however, come at the
cost of communication. It is not known whether work
stealing with private deques enjoys the theoretical
guarantees of concurrent deques and whether they can be
effective in practice. In this paper, we propose two
work-stealing algorithms with private deques and prove
that the algorithms guarantee similar theoretical
bounds as work stealing with concurrent deques. For the
analysis, we use a probabilistic model and consider a
new parameter, the branching depth of the computation.
We present an implementation of the algorithm as a C++
library and show that it compares well to Cilk on a
range of benchmarks. Since our approach relies on
private deques, it enables implementing flexible task
creation and distribution strategies. As a specific
example, we show how to implement task coalescing and
steal-half strategies, which can be important in
fine-grain, non-divide-and-conquer algorithms such as
graph algorithms, and apply them to the
depth-first-search problem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yan:2013:SFS,
author = "Shengen Yan and Guoping Long and Yunquan Zhang",
title = "{StreamScan}: fast scan algorithms for {GPUs} without
global barrier synchronization",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "229--238",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Scan (also known as prefix sum) is a very useful
primitive for various important parallel algorithms,
such as sort, BFS, SpMV, compaction and so on. Current
state of the art of GPU based scan implementation
consists of three consecutive Reduce-Scan-Scan phases.
This approach requires at least two global barriers and
3N (N is the problem size) global memory accesses. In
this paper we propose StreamScan, a novel approach to
implement scan on GPUs with only one computation phase.
The main idea is to restrict synchronization to only
adjacent workgroups, and thereby eliminating global
barrier synchronization completely. The new approach
requires only 2N global memory accesses and just one
kernel invocation. On top of this we propose two
important optimizations to further boost performance
speedups, namely thread grouping to eliminate
unnecessary local barriers, and register optimization
to expand the on chip problem size. We designed an
auto-tuning framework to search the parameter space
automatically to generate highly optimized codes for
both AMD and Nvidia GPUs. We implemented our technique
with OpenCL. Compared with previous fast scan
implementations, experimental results not only show
promising performance speedups, but also reveal
dramatic different optimization tradeoffs between
Nvidia and AMD GPU platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Heumann:2013:TEM,
author = "Stephen T. Heumann and Vikram S. Adve and Shengjie
Wang",
title = "The tasks with effects model for safe concurrency",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "239--250",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442540",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Today's widely-used concurrent programming models
either provide weak safety guarantees, making it easy
to write code with subtle errors, or are limited in the
class of programs that they can express. We propose a
new concurrent programming model based on tasks with
effects that offers strong safety guarantees while
still providing the flexibility needed to support the
many ways that concurrency is used in complex
applications. The core unit of work in our model is a
dynamically-created task. The model's key feature is
that each task has programmer-specified effects, and a
run-time scheduler is used to ensure that two tasks are
run concurrently only if they have non-interfering
effects. Through the combination of statically
verifying the declared effects of tasks and using an
effect-aware run-time scheduler, our model is able to
guarantee strong safety properties, including data race
freedom and atomicity. It is also possible to use our
model to write programs and computations that can be
statically proven to behave deterministically. We
describe the tasks with effects programming model and
provide a formal dynamic semantics for it. We also
describe our implementation of this model in an
extended version of Java and evaluate its use in
several programs exhibiting various patterns of
concurrency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bonetta:2013:TPE,
author = "Daniele Bonetta and Walter Binder and Cesare
Pautasso",
title = "{TigerQuoll}: parallel event-based {JavaScript}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "251--260",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442541",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "JavaScript, the most popular language on the Web, is
rapidly moving to the server-side, becoming even more
pervasive. Still, JavaScript lacks support for shared
memory parallelism, making it challenging for
developers to exploit multicores present in both
servers and clients. In this paper we present
TigerQuoll, a novel API and runtime for parallel
programming in JavaScript. TigerQuoll features an
event-based API and a parallel runtime allowing
applications to exploit a mutable shared memory space.
The programming model of TigerQuoll features automatic
consistency and concurrency management, such that
developers do not have to deal with shared-data
synchronization. TigerQuoll supports an innovative
transaction model that allows for eventual consistency
to speed up high-contention workloads. Experiments show
that TigerQuoll applications scale well, allowing one
to implement common parallelism patterns in
JavaScript.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dice:2013:UHT,
author = "Dave Dice and Yossi Lev and Yujie Liu and Victor
Luchangco and Mark Moir",
title = "Using hardware transactional memory to correct and
simplify and readers-writer lock algorithm",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "261--270",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442542",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Designing correct synchronization algorithms is
notoriously difficult, as evidenced by a bug we have
identified that has apparently gone unnoticed in a
well-known synchronization algorithm for nearly two
decades. We use hardware transactional memory (HTM) to
construct a corrected version of the algorithm. This
version is significantly simpler than the original and
furthermore improves on it by eliminating usage
constraints and reducing space requirements.
Performance of the HTM-based algorithm is competitive
with the original in ``normal'' conditions, but it does
suffer somewhat under heavy contention. We successfully
apply some optimizations to help close this gap, but we
also find that they are incompatible with known
techniques for improving progress properties. We
discuss ways in which future HTM implementations may
address these issues. Finally, although our focus is on
how effectively HTM can correct and simplify the
algorithm, we also suggest bug fixes and workarounds
that do not depend on HTM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cascaval:2013:ZPW,
author = "Calin Cascaval and Seth Fowler and Pablo
Montesinos-Ortego and Wayne Piekarski and Mehrdad
Reshadi and Behnam Robatmili and Michael Weber and
Vrajesh Bhavsar",
title = "{ZOOMM}: a parallel {Web} browser engine for multicore
mobile devices",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "271--280",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442543",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "We explore the challenges in expressing and managing
concurrency in browsers on mobile devices. Browsers are
complex applications that implement multiple standards,
need to support legacy behavior, and are highly dynamic
and interactive. We present ZOOMM, a highly concurrent
web browser engine prototype and show how concurrency
is effectively exploited at different levels: speed up
computation performance, preload network resources, and
preprocess resources outside the critical path of page
loading. On a dual-core Android mobile device we
demonstrate that ZOOMM is two times faster than the
native WebKit based browser when loading the set of
pages defined in the Vellamo benchmark.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Grasso:2013:APS,
author = "Ivan Grasso and Klaus Kofler and Biagio Cosenza and
Thomas Fahringer",
title = "Automatic problem size sensitive task partitioning on
heterogeneous parallel systems",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "281--282",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442545",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "In this paper we propose a novel approach which
automatizes task partitioning in heterogeneous systems.
Our framework is based on the Insieme Compiler and
Runtime infrastructure. The compiler translates a
single-device OpenCL program into a multi-device OpenCL
program. The runtime system then performs dynamic task
partitioning based on an offline-generated prediction
model. In order to derive the prediction model, we use
a machine learning approach that incorporates static
program features as well as dynamic, input sensitive
features. Our approach has been evaluated over a suite
of 23 programs and achieves performance improvements
compared to an execution of the benchmarks on a single
CPU and a single GPU only.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2013:DLO,
author = "Jun Liu and Wei Ding and Ohyoung Jang and Mahmut
Kandemir",
title = "Data layout optimization for {GPGPU} architectures",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "283--284",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442546",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "GPUs are being widely used in accelerating
general-purpose applications, leading to the emergence
of GPGPU architectures. New programming models, e.g.,
Compute Unified Device Architecture (CUDA), have been
proposed to facilitate programming general-purpose
computations in GPGPUs. However, writing
high-performance CUDA codes manually is still tedious
and difficult. In particular, the organization of the
data in the memory space can greatly affect the
performance due to the unique features of a custom
GPGPU memory hierarchy. In this work, we propose an
automatic data layout transformation framework to solve
the key issues associated with a GPGPU memory hierarchy
(i.e., channel skewing, data coalescing, and bank
conflicts). Our approach employs a widely applicable
strategy based on a novel concept called data
localization. Specifically, we try to optimize the
layout of the arrays accessed in affine loop nests, for
both the device memory and shared memory, at both
coarse grain and fine grain parallelization levels. We
performed an experimental evaluation of our data layout
optimization strategy using 15 benchmarks on an NVIDIA
CUDA GPU device. The results show that the proposed
data transformation approach brings around 4.3X speedup
on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Padmanabhan:2013:DTO,
author = "Shobana Padmanabhan and Yixin Chen and Roger D.
Chamberlain",
title = "Decomposition techniques for optimal design-space
exploration of streaming applications",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "285--286",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442547",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Streaming data programs are an important class of
applications, for which queueing network models are
frequently available. While the design space can be
large, decomposition techniques can be effective at
design space reduction. We introduce two decomposition
techniques called convex decomposition and unchaining
and present implications for a biosequence search
application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Yu:2013:EDA,
author = "Xiaodong Yu and Michela Becchi",
title = "Exploring different automata representations for
efficient regular expression matching on {GPUs}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "287--288",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442548",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Regular expression matching is a central task in
several networking (and search) applications and has
been accelerated on a variety of parallel
architectures. All solutions are based on finite
automata (either in deterministic or non-deterministic
form), and mostly focus on effective memory
representations for such automata. Recently, a handful
of work has proposed efficient regular expression
matching designs for GPUs; however, most of them aim at
achieving good performance on small datasets. Nowadays,
practical solutions must support the increased size and
complexity of real world datasets. In this work, we
explore the deployment and optimization of different
GPU designs of regular expression matching engines,
focusing on large datasets containing a large number of
complex patterns.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Edmonds:2013:EGA,
author = "Nick Edmonds and Jeremiah Willcock and Andrew
Lumsdaine",
title = "Expressing graph algorithms using generalized active
messages",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "289--290",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442549",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Recently, graph computation has emerged as an
important class of high-performance computing
application whose characteristics differ markedly from
those of traditional, compute-bound, kernels. Libraries
such as BLAS, LAPACK, and others have been successful
in codifying best practices in numerical computing. The
data-driven nature of graph applications necessitates a
more complex application stack incorporating runtime
optimization. In this paper, we present a method of
phrasing graph algorithms as collections of
asynchronous, concurrently executing, concise code
fragments which may be invoked both locally and in
remote address spaces. A runtime layer performs a
number of dynamic optimizations, including message
coalescing, message combining, and software routing.
Practical implementations and performance results are
provided for a number of representative algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lu:2013:MLP,
author = "Ligang Lu and Karen Magerlein",
title = "Multi-level parallel computing of reverse time
migration for seismic imaging on {Blue Gene/Q}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "291--292",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442550",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Blue Gene/Q (BG/Q) is an early representative of
increasing scale and thread count that will
characterize future HPC systems: large counts of nodes,
cores, and threads; and a rich programming environment
with many degrees of freedom in parallel computing
optimization. So it is both a challenge and an
opportunity to it to accelerate the seismic imaging
applications to the unprecedented levels that will
significantly advance the technologies for the oil and
gas industry. In this work we aim to address two
important questions: how HPC systems with high levels
of scale and thread count will perform in real
applications; and how systems with many degrees of
freedom in parallel programming can be calibrated to
achieve optimal performance. Based on BG/Q's
architecture features and RTM workload characteristics,
we developed massive domain partition, MPI, and SIMD
Our detailed deep analyses in various aspects of
optimization also provide valuable experience and
insights into how can be utilized to facilitate the
advance of seismic imaging technologies. Our BG/Q RTM
solution achieved a 14.93x speedup over the BG/P
implementation. Our multi-level parallelism strategies
for Reverse Time Migration (RTM) seismic imaging
computing on BG/Q provides an example of how HPC
systems like BG/Q can accelerate applications to a new
level.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Park:2013:PPB,
author = "Changhee Park and Guy L. {Steele, Jr.} and
Jean-Baptiste Tristan",
title = "Parallel programming with big operators",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "293--294",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442551",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "In the sciences, it is common to use the so-called
``big operator'' notation to express the iteration of a
binary operator (the reducer) over a collection of
values. Such a notation typically assumes that the
reducer is associative and abstracts the iteration
process. Consequently, from a programming
point-of-view, we can organize the reducer operations
to minimize the depth of the overall reduction,
allowing a potentially parallel evaluation of a big
operator expression. We believe that the big operator
notation is indeed an effective construct to express
parallel computations in the Generate/Map/Reduce
programming model, and our goal is to introduce it in
programming languages to support parallel programming.
The effective definition of such a big operator
expression requires a simple way to generate elements,
and a simple way to declare algebraic properties of the
reducer (such as its identity, or its commutativity).
In this poster, we want to present an extension of
Scala with support for big operator expressions. We
show how big operator expressions are defined and how
the API is organized to support the simple definition
of reducers with their algebraic properties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Afek:2013:PHL,
author = "Yehuda Afek and Amir Levy and Adam Morrison",
title = "Programming with hardware lock elision",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "295--296",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442552",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "We present a simple yet effective technique for
improving performance of lock-based code using the
hardware lock elision (HLE) feature in Intel's upcoming
Haswell processor. We also describe how to extend
Haswell's HLE mechanism to achieve a similar effect to
our lock elision scheme entirely in hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lu:2013:REM,
author = "Kai Lu and Xu Zhou and Xiaoping Wang and Wenzhe Zhang
and Gen Li",
title = "{RaceFree}: an efficient multi-threading model for
determinism",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "297--298",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442553",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Current deterministic systems generally incur large
overhead due to the difficulty of detecting and
eliminating data races. This paper presents RaceFree, a
novel multi-threading runtime that adopts a relaxed
deterministic model to provide a data-race-free
environment for parallel programs. This model cuts off
unnecessary shared-memory communication by isolating
threads in separated memories, which eliminates direct
data races. Meanwhile, we leverage the happen-before
relation defined by applications themselves as one-way
communication pipes to perform necessary thread
communication. Shared-memory communication is
transparently converted to message-passing style
communication by our Memory Modification Propagation
(MMP) mechanism, which propagates local memory
modifications to other threads through the
happen-before relation pipes. The overhead of RaceFree
is 67.2\% according to our tests on parallel
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shun:2013:RCT,
author = "Julian Shun and Guy E. Blelloch and Jeremy T. Fineman
and Phillip B. Gibbons",
title = "Reducing contention through priority updates",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "299--300",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442554",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Diamos:2013:RAM,
author = "Gregory Diamos and Haicheng Wu and Jin Wang and Ashwin
Lele and Sudhakar Yalamanchili",
title = "Relational algorithms for multi-bulk-synchronous
processors",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "301--302",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442555",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Relational databases remain an important application
infrastructure for organizing and analyzing massive
volumes of data. At the same time, processor
architectures are increasingly gravitating towards
Multi-Bulk-Synchronous processor (Multi-BSP)
architectures employing throughput-optimized memory
systems, lightweight multi-threading, and
Single-Instruction Multiple-Data (SIMD) core
organizations. This paper explores the mapping of
primitive relational algebra operations onto such
architectures to improve the throughput of data
warehousing applications built on relational
databases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Carvalho:2013:RET,
author = "Fernando Miguel Carvalho and Jo{\~a}o Cachopo",
title = "Runtime elision of transactional barriers for captured
memory",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "303--304",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442556",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "In this paper, we propose a new technique that can
identify transaction-local memory (i.e. captured memory
), in managed environments, while having a low runtime
overhead. We implemented our proposal in a well known
STM framework (Deuce) and we tested it in STMBench7
with two different STMs: TL2 and LSA. In both STMs the
performance improved significantly (4 times and 2.6
times, respectively). Moreover, running the STAMP
benchmarks with our approach shows improvements of 7
times in the best case for the Vacation application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Park:2013:SDR,
author = "Chang-Seo Park and Koushik Sen and Costin Iancu",
title = "Scalable data race detection for partitioned global
address space programs",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "305--306",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442557",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Contemporary and future programming languages for HPC
promote hybrid parallelism and shared memory
abstractions using a global address space. In this
programming style, data races occur easily and are
notoriously hard to find. Previous work on data race
detection for shared memory programs reports 10X-100X
slowdowns for non-scientific programs. Previous work on
distributed memory programs instruments only
communication operations. In this paper we present the
first complete implementation of data race detection at
scale for UPC programs. Our implementation tracks local
and global memory references in the program and it uses
two techniques to reduce the overhead: (1) hierarchical
function and instruction level sampling; and (2)
exploiting the runtime persistence of aliasing and
locality specific to Partitioned Global Address Space
applications. The results indicate that both techniques
are required in practice: well optimized instruction
sampling introduces overheads as high as 6500\% (65X
slowdown), while each technique in separation is able
to reduce it to 1000\% (10X slowdown). When applying
the optimizations in conjunction our tool finds all
previously known data races in our benchmark programs
with at most 50\% overhead. Furthermore, while previous
results illustrate the benefits of function level
sampling, our experiences show that this technique does
not work for scientific programs: instruction sampling
or a hybrid approach is required.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dice:2013:SSC,
author = "Dave Dice and Yossi Lev and Mark Moir",
title = "Scalable statistics counters",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "307--308",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442558",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Naive statistics counters that are commonly used to
monitor system events and performance become a
scalability bottleneck as systems become larger and
more NUMA; furthermore some are so inaccurate that they
are not useful. We present a number of techniques to
address these problems, evaluating solutions in terms
of performance, scalability, space overhead, and
accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wozniak:2013:SSD,
author = "Justin M. Wozniak and Timothy G. Armstrong and Michael
Wilde and Daniel S. Katz and Ewing Lusk and Ian T.
Foster",
title = "{Swift/T}: scalable data flow programming for
many-task applications",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "309--310",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442559",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Swift/T, a novel programming language implementation
for highly scalable data flow programs, is presented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cai:2013:TST,
author = "Yan Cai and Ke Zhai and Shangru Wu and W. K. Chan",
title = "{TeamWork}: synchronizing threads globally to detect
real deadlocks for multithreaded programs",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "311--312",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442560",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "This paper presents the aim of TeamWork, our ongoing
effort to develop a comprehensive dynamic deadlock
confirmation tool for multithreaded programs. It also
presents a refined object abstraction algorithm that
refines the existing stack hash abstraction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{ElMehdiDiouri:2013:TEE,
author = "Mohammed {El Mehdi Diouri} and Olivier Gl{\"u}ck and
Laurent Lef{\`e}vre and Franck Cappello",
title = "Towards an energy estimator for fault tolerance
protocols",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "313--314",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442561",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Checkpointing protocols have different energy
consumption depending on parameters like application
features and platform characteristics. To select a
protocol for a given execution, we propose an energy
estimator that relies on an energy calibration of the
considered platform and a user description of the
execution settings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wimmer:2013:WSC,
author = "Martin Wimmer and Daniel Cederman and Jesper Larsson
Tr{\"a}ff and Philippas Tsigas",
title = "Work-stealing with configurable scheduling
strategies",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "315--316",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442562",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Work-stealing systems are typically oblivious to the
nature of the tasks they are scheduling. They do not
know or take into account how long a task will take to
execute or how many subtasks it will spawn. Moreover,
task execution order is typically determined by an
underlying task storage data structure, and cannot be
changed. There are thus possibilities for optimizing
task parallel executions by providing information on
specific tasks and their preferred execution order to
the scheduling system. We investigate generalizations
of work-stealing and introduce a framework enabling
applications to dynamically provide hints on the nature
of specific tasks using scheduling strategies.
Strategies can be used to independently control both
local task execution and steal order. Strategies allow
optimizations on specific tasks, in contrast to more
conventional scheduling policies that are typically
global in scope. Strategies are composable and allow
different, specific scheduling choices for different
parts of an application simultaneously. We have
implemented a work-stealing system based on our
strategy framework. A series of benchmarks demonstrates
beneficial effects that can be achieved with scheduling
strategies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhou:2013:WED,
author = "Bowen Zhou and Milind Kulkarni and Saurabh Bagchi",
title = "{WuKong}: effective diagnosis of bugs at large system
scales",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "317--318",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442563",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "A key challenge in developing large scale applications
(both in system size and in input size) is finding bugs
that are latent at the small scales of testing, only
manifesting when a program is deployed at large scales.
Traditional statistical techniques fail because no
error-free run is available at deployment scales for
training purposes. Prior work used scaling models to
detect anomalous behavior at large scales without being
trained on correct behavior at that scale. However,
that work cannot localize bugs automatically. In this
paper, we extend that work in three ways: (i) we
develop an automatic diagnosis technique, based on
feature reconstruction; (ii) we design a heuristic to
effectively prune the feature space; and (iii) we
validate our design through one fault-injection study,
finding that our system can effectively localize bugs
in a majority of cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Norell:2013:IPD,
author = "Ulf Norell",
title = "Interactive programming with dependent types",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "1--2",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500610",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In dependently typed languages run-time values can
appear in types, making it possible to give programs
more precise types than in languages without dependent
types. This can range from keeping track of simple
invariants like the length of a list, to full
functional correctness. In addition to having some
correctness guarantees on the final program, assigning
more precise types to programs means that you can get
more assistance from the type checker while writing
them. This is what I focus on here, demonstrating how
the programming environment of Agda can help you when
developing dependently typed programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Traytel:2013:VDP,
author = "Dmitriy Traytel and Tobias Nipkow",
title = "Verified decision procedures for {MSO} on words based
on derivatives of regular expressions",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "3--12",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500612",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Monadic second-order logic on finite words (MSO) is a
decidable yet expressive logic into which many decision
problems can be encoded. Since MSO formulas correspond
to regular languages, equivalence of MSO formulas can
be reduced to the equivalence of some regular
structures (e.g. automata). This paper presents a
verified functional decision procedure for MSO formulas
that is not based on automata but on regular
expressions. Functional languages are ideally suited
for this task: regular expressions are data types and
functions on them are defined by pattern matching and
recursion and are verified by structural induction.
Decision procedures for regular expression equivalence
have been formalized before, usually based on
Brzozowski derivatives. Yet, for a straightforward
embedding of MSO formulas into regular expressions an
extension of regular expressions with a projection
operation is required. We prove total correctness and
completeness of an equivalence checker for regular
expressions extended in that way. We also define a
language-preserving translation of formulas into
regular expressions with respect to two different
semantics of MSO. Our results have been formalized and
verified in the theorem prover Isabelle. Using
Isabelle's code generation facility, this yields purely
functional, formally verified programs that decide
equivalence of MSO formulas.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Broadbent:2013:CSC,
author = "Christopher Broadbent and Arnaud Carayol and Matthew
Hague and Olivier Serre",
title = "{C-SHORe}: a collapsible approach to higher-order
verification",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "13--24",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500589",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Higher-order recursion schemes (HORS) have recently
received much attention as a useful abstraction of
higher-order functional programs with a number of new
verification techniques employing HORS model-checking
as their centrepiece. This paper contributes to the
ongoing quest for a truly scalable model-checker for
HORS by offering a different, automata theoretic
perspective. We introduce the first practical
model-checking algorithm that acts on a generalisation
of pushdown automata equi-expressive with HORS called
collapsible pushdown systems (CPDS). At its core is a
substantial modification of a recently studied
saturation algorithm for CPDS. In particular it is able
to use information gathered from an approximate forward
reachability analysis to guide its backward search.
Moreover, we introduce an algorithm that prunes the
CPDS prior to model-checking and a method for
extracting counter-examples in negative instances. We
compare our tool with the state-of-the-art verification
tools for HORS and obtain encouraging results. In
contrast to some of the main competition tackling the
same problem, our algorithm is fixed-parameter
tractable, and we also offer significantly improved
performance over the only previously published tool of
which we are aware that also enjoys this property. The
tool and additional material are available from
http://cshore.cs.rhul.ac.uk.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Petersen:2013:ASV,
author = "Leaf Petersen and Dominic Orchard and Neal Glew",
title = "Automatic {SIMD} vectorization for {Haskell}",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "25--36",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500605",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Expressing algorithms using immutable arrays greatly
simplifies the challenges of automatic SIMD
vectorization, since several important classes of
dependency violations cannot occur. The Haskell
programming language provides libraries for programming
with immutable arrays, and compiler support for
optimizing them to eliminate the overhead of
intermediate temporary arrays. We describe an
implementation of automatic SIMD vectorization in a
Haskell compiler which gives substantial vector
speedups for a range of programs written in a natural
programming style. We compare performance with that of
programs compiled by the Glasgow Haskell Compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Mainland:2013:EVI,
author = "Geoffrey Mainland and Roman Leshchinskiy and Simon
Peyton Jones",
title = "Exploiting vector instructions with generalized stream
fusion",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "37--48",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500601",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Stream fusion is a powerful technique for
automatically transforming high-level
sequence-processing functions into efficient
implementations. It has been used to great effect in
Haskell libraries for manipulating byte arrays, Unicode
text, and unboxed vectors. However, some operations,
like vector append, still do not perform well within
the standard stream fusion framework. Others, like SIMD
computation using the SSE and AVX instructions
available on modern x86 chips, do not seem to fit in
the framework at all. In this paper we introduce
generalized stream fusion, which solves these issues.
The key insight is to bundle together multiple stream
representations, each tuned for a particular class of
stream consumer. We also describe a stream
representation suited for efficient computation with
SSE instructions. Our ideas are implemented in modified
versions of the GHC compiler and vector library.
Benchmarks show that high-level Haskell code written
using our compiler and libraries can produce code that
is faster than both compiler- and hand-vectorized C.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{McDonell:2013:OPF,
author = "Trevor L. McDonell and Manuel M. T. Chakravarty and
Gabriele Keller and Ben Lippmeier",
title = "Optimising purely functional {GPU} programs",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "49--60",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500595",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Purely functional, embedded array programs are a good
match for SIMD hardware, such as GPUs. However, the
naive compilation of such programs quickly leads to
both code explosion and an excessive use of
intermediate data structures. The resulting slow-down
is not acceptable on target hardware that is usually
chosen to achieve high performance. In this paper, we
discuss two optimisation techniques, sharing recovery
and array fusion, that tackle code explosion and
eliminate superfluous intermediate structures. Both
techniques are well known from other contexts, but they
present unique challenges for an embedded language
compiled for execution on a GPU. We present novel
methods for implementing sharing recovery and array
fusion, and demonstrate their effectiveness on a set of
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Bernardy:2013:TTC,
author = "Jean-Philippe Bernardy and Moulin Guilhem",
title = "Type-theory in color",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "61--72",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500577",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dependent type-theory aims to become the standard way
to formalize mathematics at the same time as displacing
traditional platforms for high-assurance programming.
However, current implementations of type theory are
still lacking, in the sense that some obvious truths
require explicit proofs, making type-theory awkward to
use for many applications, both in formalization and
programming. In particular, notions of erasure are
poorly supported. In this paper we propose an extension
of type-theory with colored terms, color erasure and
interpretation of colored types as predicates. The
result is a more powerful type-theory: some definitions
and proofs may be omitted as they become trivial, it
becomes easier to program with precise types, and some
parametricity results can be internalized.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Devriese:2013:TSM,
author = "Dominique Devriese and Frank Piessens",
title = "Typed syntactic meta-programming",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "73--86",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500575",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel set of meta-programming primitives
for use in a dependently-typed functional language. The
types of our meta-programs provide strong and precise
guarantees about their termination, correctness and
completeness. Our system supports type-safe
construction and analysis of terms, types and typing
contexts. Unlike alternative approaches, they are
written in the same style as normal programs and use
the language's standard functional computational model.
We formalise the new meta-programming primitives,
implement them as an extension of Agda, and provide
evidence of usefulness by means of two compelling
applications in the fields of datatype-generic
programming and proof tactics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Ziliani:2013:MMT,
author = "Beta Ziliani and Derek Dreyer and Neelakantan R.
Krishnaswami and Aleksandar Nanevski and Viktor
Vafeiadis",
title = "{Mtac}: a monad for typed tactic programming in
{Coq}",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "87--100",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500579",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Effective support for custom proof automation is
essential for large scale interactive proof
development. However, existing languages for automation
via *tactics* either (a) provide no way to specify the
behavior of tactics within the base logic of the
accompanying theorem prover, or (b) rely on advanced
type-theoretic machinery that is not easily integrated
into established theorem provers. We present Mtac, a
lightweight but powerful extension to Coq that supports
dependently-typed tactic programming. Mtac tactics have
access to all the features of ordinary Coq programming,
as well as a new set of typed tactical primitives. We
avoid the need to touch the trusted kernel typechecker
of Coq by encapsulating uses of these new tactical
primitives in a *monad*, and instrumenting Coq so that
it executes monadic tactics during type inference.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Dolan:2013:FSF,
author = "Stephen Dolan",
title = "Fun with semirings: a functional pearl on the abuse of
linear algebra",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "101--110",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500613",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Describing a problem using classical linear algebra is
a very well-known problem-solving technique. If your
question can be formulated as a question about real or
complex matrices, then the answer can often be found by
standard techniques. It's less well-known that very
similar techniques still apply where instead of real or
complex numbers we have a closed semiring, which is a
structure with some analogue of addition and
multiplication that need not support subtraction or
division. We define a typeclass in Haskell for
describing closed semirings, and implement a few
functions for manipulating matrices and polynomials
over them. We then show how these functions can be used
to calculate transitive closures, find shortest or
longest or widest paths in a graph, analyse the data
flow of imperative programs, optimally pack knapsacks,
and perform discrete event simulations, all by just
providing an appropriate underlying closed semiring.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Bernardy:2013:EDC,
author = "Jean-Philippe Bernardy and Koen Claessen",
title = "Efficient divide-and-conquer parsing of practical
context-free languages",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "111--122",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500576",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a divide-and-conquer algorithm for parsing
context-free languages efficiently. Our algorithm is an
instance of Valiant's (1975), who reduced the problem
of parsing to matrix multiplications. We show that,
while the conquer step of Valiant's is O ( n$^3$ ) in
the worst case, it improves to O (log n$^3$ ), under
certain conditions satisfied by many useful inputs.
These conditions occur for example in program texts
written by humans. The improvement happens because the
multiplications involve an overwhelming majority of
empty matrices. This result is relevant to modern
computing: divide-and-conquer algorithms can be
parallelized relatively easily.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Mairson:2013:FGT,
author = "Harry George Mairson",
title = "Functional geometry and the {Trait{\'e} de Lutherie}:
functional pearl",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "123--132",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500617",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a functional programming approach to the
design of outlines of eighteenth-century string
instruments. The approach is based on the research
described in Fran{\c{c}}ois Denis's book, Trait{\'e} de
lutherie. The programming vernacular for Denis's
instructions, which we call functional geometry, is
meant to reiterate the historically justified language
and techniques of this musical instrument design. The
programming metaphor is entirely Euclidean, involving
straightedge and compass constructions, with few (if
any) numbers, and no Cartesian equations or grid. As
such, it is also an interesting approach to teaching
programming and mathematics without numerical
calculation or equational reasoning. The advantage of
this language-based, functional approach to lutherie is
founded in the abstract characterization of common
patterns in instrument design. These patterns include
not only the abstraction of common straightedge and
compass constructions, but of higher-order
conceptualization of the instrument design process. We
also discuss the role of arithmetic, geometric,
harmonic, and subharmonic proportions, and the use of
their rational approximants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Brady:2013:PRA,
author = "Edwin Brady",
title = "Programming and reasoning with algebraic effects and
dependent types",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "133--144",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500581",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "One often cited benefit of pure functional programming
is that pure code is easier to test and reason about,
both formally and informally. However, real programs
have side-effects including state management,
exceptions and interactions with the outside world.
Haskell solves this problem using monads to capture
details of possibly side-effecting computations --- it
provides monads for capturing state, I/O, exceptions,
non-determinism, libraries for practical purposes such
as CGI and parsing, and many others, as well as monad
transformers for combining multiple effects.
Unfortunately, useful as monads are, they do not
compose very well. Monad transformers can quickly
become unwieldy when there are lots of effects to
manage, leading to a temptation in larger programs to
combine everything into one coarse-grained state and
exception monad. In this paper I describe an
alternative approach based on handling algebraic
effects, implemented in the IDRIS programming language.
I show how to describe side effecting computations, how
to write programs which compose multiple fine-grained
effects, and how, using dependent types, we can use
this approach to reason about states in effectful
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Kammar:2013:HA,
author = "Ohad Kammar and Sam Lindley and Nicolas Oury",
title = "Handlers in action",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "145--158",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500590",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Plotkin and Pretnar's handlers for algebraic effects
occupy a sweet spot in the design space of abstractions
for effectful computation. By separating effect
signatures from their implementation, algebraic effects
provide a high degree of modularity, allowing
programmers to express effectful programs independently
of the concrete interpretation of their effects. A
handler is an interpretation of the effects of an
algebraic computation. The handler abstraction adapts
well to multiple settings: pure or impure, strict or
lazy, static types or dynamic types. This is a position
paper whose main aim is to popularise the handler
abstraction. We give a gentle introduction to its use,
a collection of illustrative examples, and a
straightforward operational semantics. We describe our
Haskell implementation of handlers in detail, outline
the ideas behind our OCaml, SML, and Racket
implementations, and present experimental results
comparing handlers with existing code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Jones:2013:CSS,
author = "Simon Peyton Jones",
title = "Computer science as a school subject",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "159--160",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500609",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computer science is one of the richest, most exciting
disciplines on the planet, yet any teenager will tell
you that ICT (as it is called in UK schools ---
``information and communication technology'') is
focused almost entirely on the use and application of
computers, and in practice covers nothing about how
computers work, nor programming, nor anything of the
discipline of computer science as we understand it.
Over the last two decades, computing at school has
drifted from writing adventure games on the BBC Micro
to writing business plans in Excel. This is bad for our
young people's education, and it is bad for our
economy. Nor is this phenomenon restricted to the UK:
many countries are struggling with the same issues. Our
young people should be educated not only in the
application and use of digital technology, but also in
how it works, and its foundational principles. Lacking
such knowledge renders them powerless in the face of
complex and opaque technology, disenfranchises them
from making informed decisions about the digital
society, and deprives our nations of a well-qualified
stream of students enthusiastic and able to envision
and design new digital systems. Can anything be done,
given the enormous inertia of our various countries'
educational systems? Sometimes, yes. After a decade of
stasis, change has come to the UK. Over the last 18
months, there has been a wholesale reform of the
English school computing curriculum, and substantial
movement in Scotland and Wales. It now seems likely
that computer science will, for the first time, become
part of every child's education. This change has been
driven not by institutions or by the government, but by
a grass-roots movement of parents, teachers, university
academics, software developers, and others. A key agent
in this grass-roots movement---although not the only
one---is the Computing At School Working Group (CAS).
In this talk I will describe how CAS was born and
developed, and the radical changes that have taken
place since in the UK. I hope that this may be
encouraging for those pushing water uphill in other
parts of the world, and I will also try to draw out
some lessons from our experience that may be useful to
others.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Schmidt-Schauss:2013:CSH,
author = "Manfred Schmidt-Schau{\ss} and David Sabel",
title = "Correctness of an {STM} {Haskell} implementation",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "161--172",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500585",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A concurrent implementation of software transactional
memory in Concurrent Haskell using a call-by-need
functional language with processes and futures is
given. The description of the small-step operational
semantics is precise and explicit, and employs an early
abort of conflicting transactions. A proof of
correctness of the implementation is given for a
contextual semantics with may- and should-convergence.
This implies that our implementation is a correct
evaluator for an abstract specification equipped with a
big-step semantics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Pottier:2013:PPM,
author = "Fran{\c{c}}ois Pottier and Jonathan Protzenko",
title = "Programming with permissions in {Mezzo}",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "173--184",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500598",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Mezzo, a typed programming language of ML
lineage. Mezzo is equipped with a novel static
discipline of duplicable and affine permissions, which
controls aliasing and ownership. This rules out certain
mistakes, including representation exposure and data
races, and enables new idioms, such as gradual
initialization, memory re-use, and (type)state changes.
Although the core static discipline disallows sharing a
mutable data structure, Mezzo offers several ways of
working around this restriction, including a novel
dynamic ownership control mechanism which we dub
``adoption and abandon''.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Abel:2013:WRC,
author = "Andreas M. Abel and Brigitte Pientka",
title = "Wellfounded recursion with copatterns: a unified
approach to termination and productivity",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "185--196",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500591",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we study strong normalization of a core
language based on System F-omega which supports
programming with finite and infinite structures.
Building on our prior work, finite data such as finite
lists and trees are defined via constructors and
manipulated via pattern matching, while infinite data
such as streams and infinite trees is defined by
observations and synthesized via copattern matching. In
this work, we take a type-based approach to strong
normalization by tracking size information about finite
and infinite data in the type. This guarantees
compositionality. More importantly, the duality of
pattern and copatterns provide a unifying semantic
concept which allows us for the first time to elegantly
and uniformly support both well-founded induction and
coinduction by mere rewriting. The strong normalization
proof is structured around Girard's reducibility
candidates. As such our system allows for
non-determinism and does not rely on coverage. Since
System F-omega is general enough that it can be the
target of compilation for the Calculus of
Constructions, this work is a significant step towards
representing observation-centric infinite data in proof
assistants such as Coq and Agda.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Atkey:2013:PCG,
author = "Robert Atkey and Conor McBride",
title = "Productive coprogramming with guarded recursion",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "197--208",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500597",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Total functional programming offers the beguiling
vision that, just by virtue of the compiler accepting a
program, we are guaranteed that it will always
terminate. In the case of programs that are not
intended to terminate, e.g., servers, we are guaranteed
that programs will always be productive. Productivity
means that, even if a program generates an infinite
amount of data, each piece will be generated in finite
time. The theoretical underpinning for productive
programming with infinite output is provided by the
category theoretic notion of final coalgebras. Hence,
we speak of co programming with non-well-founded co
data, as a dual to programming with well-founded data
like finite lists and trees. Systems that offer
facilities for productive coprogramming, such as the
proof assistants Coq and Agda, currently do so through
syntactic guardedness checkers. Syntactic guardedness
checkers ensure that all self-recursive calls are
guarded by a use of a constructor. Such a check ensures
productivity. Unfortunately, these syntactic checks are
not compositional, and severely complicate
coprogramming. Guarded recursion, originally due to
Nakano, is tantalising as a basis for a flexible and
compositional type-based approach to coprogramming.
However, as we show, by itself, guarded recursion is
not suitable for coprogramming due to the fact that
there is no way to make finite observations on pieces
of infinite data. In this paper, we introduce the
concept of clock variables that index Nakano's guarded
recursion. Clock variables allow us to ``close over''
the generation of infinite data, and to make finite
observations, something that is not possible with
guarded recursion alone.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Hinze:2013:USR,
author = "Ralf Hinze and Nicolas Wu and Jeremy Gibbons",
title = "Unifying structured recursion schemes",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "209--220",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500578",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Folds over inductive datatypes are well understood and
widely used. In their plain form, they are quite
restricted; but many disparate generalisations have
been proposed that enjoy similar calculational
benefits. There have also been attempts to unify the
various generalisations: two prominent such
unifications are the 'recursion schemes from comonads'
of Uustalu, Vene and Pardo, and our own 'adjoint
folds'. Until now, these two unified schemes have
appeared incompatible. We show that this appearance is
illusory: in fact, adjoint folds subsume recursion
schemes from comonads. The proof of this claim involves
standard constructions in category theory that are
nevertheless not well known in functional programming:
Eilenberg-Moore categories and bialgebras.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Krishnaswami:2013:HOF,
author = "Neelakantan R. Krishnaswami",
title = "Higher-order functional reactive programming without
spacetime leaks",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "221--232",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500588",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional reactive programming (FRP) is an elegant
approach to declaratively specify reactive systems.
However, the powerful abstractions of FRP have
historically made it difficult to predict and control
the resource usage of programs written in this style.
In this paper, we give a new language for higher-order
reactive programming. Our language generalizes and
simplifies prior type systems for reactive programming,
by supporting the use of streams of streams,
first-class functions, and higher-order operations. We
also support many temporal operations beyond streams,
such as terminatable streams, events, and even
resumptions with first-class schedulers. Furthermore,
our language supports an efficient implementation
strategy permitting us to eagerly deallocate old values
and statically rule out spacetime leaks, a notorious
source of inefficiency in reactive programs.
Furthermore, these memory guarantees are achieved
without the use of a complex substructural type
discipline. We also show that our implementation
strategy of eager deallocation is safe, by showing the
soundness of our type system with a novel step-indexed
Kripke logical relation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Jeffrey:2013:FRP,
author = "Alan Jeffrey",
title = "Functional reactive programming with liveness
guarantees",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "233--244",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500584",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional Reactive Programming (FRP) is an approach
to the development of reactive systems which provides a
pure functional interface, but which may be implemented
as an abstraction of an imperative event-driven layer.
FRP systems typically provide a model of behaviours
(total time-indexed values, implemented as pull
systems) and event sources (partial time-indexed
values, implemented as push systems). In this paper, we
investigate a type system for event-driven FRP programs
which provide liveness guarantees, that is every input
event is guaranteed to generate an output event. We
show that FRP can be implemented on top of a model of
sets and relations, and that the isomorphism between
event sources and behaviours corresponds to the
isomorphism between relations and set-valued functions.
We then implement sets and relations using a model of
continuations using the usual double-negation CPS
transform. The implementation of behaviours as pull
systems based on futures, and of event sources as push
systems based on the observer pattern, thus arises from
first principles. We also discuss a Java implementation
of the FRP model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Morihata:2013:SCP,
author = "Akimasa Morihata",
title = "A short cut to parallelization theorems",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "245--256",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500580",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The third list-homomorphism theorem states that if a
function is both foldr and foldl, it has a
divide-and-conquer parallel implementation as well. In
this paper, we develop a theory for obtaining such
parallelization theorems. The key is a new proof of the
third list-homomorphism theorem based on shortcut
deforestation. The proof implies that there exists a
divide-and-conquer parallel program of the form of h (
x ' merge ' y ) = h$_1$ x odot h$_2$ y, where h is the
subject of parallelization, merge is the operation of
integrating independent substructures, h$_1$ and h$_2$
are computations applied to substructures, possibly in
parallel, and odot merges the results calculated for
substructures, if (i) h can be specified by two certain
forms of iterative programs, and (ii) merge can be
implemented by a function of a certain polymorphic
type. Therefore, when requirement (ii) is fulfilled, h
has a divide-and-conquer implementation if h has two
certain forms of implementations. We show that our
approach is applicable to structure-consuming
operations by catamorphisms (folds),
structure-generating operations by anamorphisms
(unfolds), and their generalizations called
hylomorphisms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Axelsson:2013:UCP,
author = "Emil Axelsson and Koen Claessen",
title = "Using circular programs for higher-order syntax:
functional pearl",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "257--262",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500614",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This pearl presents a novel technique for constructing
a first-order syntax tree directly from a higher-order
interface. We exploit circular programming to generate
names for new variables, resulting in a simple yet
efficient method. Our motivating application is the
design of embedded languages supporting variable
binding, where it is convenient to use higher-order
syntax when constructing programs, but first-order
syntax when processing or transforming programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Balabonski:2013:WOM,
author = "Thibaut Balabonski",
title = "Weak optimality, and the meaning of sharing",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "263--274",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500606",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we investigate laziness and optimal
evaluation strategies for functional programming
languages. We consider the weak lambda-calculus as a
basis of functional programming languages, and we adapt
to this setting the concepts of optimal reductions that
were defined for the full lambda-calculus. We prove
that the usual implementation of call-by-need using
sharing is optimal, that is, normalizing any
lambda-term with call-by-need requires exactly the same
number of reduction steps as the shortest reduction
sequence in the weak lambda-calculus without sharing.
Furthermore, we prove that optimal reduction sequences
without sharing are not computable. Hence sharing is
the only computable means to reach weak optimality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Weirich:2013:SFE,
author = "Stephanie Weirich and Justin Hsu and Richard A.
Eisenberg",
title = "System {FC} with explicit kind equality",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "275--286",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500599",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "System FC, the core language of the Glasgow Haskell
Compiler, is an explicitly-typed variant of System F
with first-class type equality proofs called coercions.
This extensible proof system forms the foundation for
type system extensions such as type families
(type-level functions) and Generalized Algebraic
Datatypes (GADTs). Such features, in conjunction with
kind polymorphism and datatype promotion, support
expressive compile-time reasoning. However, the core
language lacks explicit kind equality proofs. As a
result, type-level computation does not have access to
kind-level functions or promoted GADTs, the type-level
analogues to expression-level features that have been
so useful. In this paper, we eliminate such
discrepancies by introducing kind equalities to System
FC. Our approach is based on dependent type systems
with heterogeneous equality and the ``Type-in-Type''
axiom, yet it preserves the metatheoretic properties of
FC. In particular, type checking is simple, decidable
and syntax directed. We prove the preservation and
progress theorems for the extended language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Sculthorpe:2013:CMP,
author = "Neil Sculthorpe and Jan Bracker and George Giorgidze
and Andy Gill",
title = "The constrained-monad problem",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "287--298",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500602",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In Haskell, there are many data types that would form
monads were it not for the presence of type-class
constraints on the operations on that data type. This
is a frustrating problem in practice, because there is
a considerable amount of support and infrastructure for
monads that these data types cannot use. Using several
examples, we show that a monadic computation can be
restructured into a normal form such that the standard
monad class can be used. The technique is not specific
to monads, and we show how it can also be applied to
other structures, such as applicative functors. One
significant use case for this technique is
domain-specific languages, where it is often desirable
to compile a deep embedding of a computation to some
other language, which requires restricting the types
that can appear in that computation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Svenningsson:2013:SCR,
author = "Josef David Svenningsson and Bo Joel Svensson",
title = "Simple and compositional reification of monadic
embedded languages",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "299--304",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500611",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When writing embedded domain specific languages in
Haskell, it is often convenient to be able to make an
instance of the Monad class to take advantage of the
do-notation and the extensive monad libraries. Commonly
it is desirable to compile such languages rather than
just interpret them. This introduces the problem of
monad reification, i.e. observing the structure of the
monadic computation. We present a solution to the monad
reification problem and illustrate it with a small
robot control language. Monad reification is not new
but the novelty of our approach is in its directness,
simplicity and compositionality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Hidaka:2013:SRQ,
author = "Soichiro Hidaka and Kazuyuki Asada and Zhenjiang Hu
and Hiroyuki Kato and Keisuke Nakano",
title = "Structural recursion for querying ordered graphs",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "305--318",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500608",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Structural recursion, in the form of, for example,
folds on lists and catamorphisms on algebraic data
structures including trees, plays an important role in
functional programming, by providing a systematic way
for constructing and manipulating functional programs.
It is, however, a challenge to define structural
recursions for graph data structures, the most
ubiquitous sort of data in computing. This is because
unlike lists and trees, graphs are essentially not
inductive and cannot be formalized as an initial
algebra in general. In this paper, we borrow from the
database community the idea of structural recursion on
how to restrict recursions on infinite unordered
regular trees so that they preserve the finiteness
property and become terminating, which are desirable
properties for query languages. We propose a new graph
transformation language called lambdaFG for
transforming and querying ordered graphs, based on the
well-defined bisimulation relation on ordered graphs
with special epsilon-edges. The language lambdaFG is a
higher order graph transformation language that extends
the simply typed lambda calculus with graph
constructors and more powerful structural recursions,
which is extended for transformations on the sibling
dimension. It not only gives a general framework for
manipulating graphs and reasoning about them, but also
provides a solution to the open problem of how to
define a structural recursion on ordered graphs, with
the help of the bisimilarity for ordered graphs with
epsilon-edges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Delaware:2013:MMM,
author = "Benjamin Delaware and Steven Keuchel and Tom
Schrijvers and Bruno C.d.S. Oliveira",
title = "Modular monadic meta-theory",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "319--330",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500587",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents 3MT, a framework for modular
mechanized meta-theory of languages with effects. Using
3MT, individual language features and their
corresponding definitions --- semantic functions,
theorem statements and proofs-- can be built separately
and then reused to create different languages with
fully mechanized meta-theory. 3MT combines modular
datatypes and monads to define denotational semantics
with effects on a per-feature basis, without fixing the
particular set of effects or language constructs. One
well-established problem with type soundness proofs for
denotational semantics is that they are notoriously
brittle with respect to the addition of new effects.
The statement of type soundness for a language depends
intimately on the effects it uses, making it
particularly challenging to achieve modularity. 3MT
solves this long-standing problem by splitting these
theorems into two separate and reusable parts: a
feature theorem that captures the well-typing of
denotations produced by the semantic function of an
individual feature with respect to only the effects
used, and an effect theorem that adapts well-typings of
denotations to a fixed superset of effects. The proof
of type soundness for a particular language simply
combines these theorems for its features and the
combination of their effects. To establish both
theorems, 3MT uses two key reasoning techniques:
modular induction and algebraic laws about effects.
Several effectful language features, including
references and errors, illustrate the capabilities of
3MT. A case study reuses these features to build fully
mechanized definitions and proofs for 28 languages,
including several versions of mini-ML with effects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Lorenzen:2013:MAT,
author = "Florian Lorenzen and Sebastian Erdweg",
title = "Modular and automated type-soundness verification for
language extensions",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "331--342",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500596",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Language extensions introduce high-level programming
constructs that protect programmers from low-level
details and repetitive tasks. For such an abstraction
barrier to be sustainable, it is important that no
errors are reported in terms of generated code. A
typical strategy is to check the original user code
prior to translation into a low-level encoding,
applying the assumption that the translation does not
introduce new errors. Unfortunately, such assumption is
untenable in general, but in particular in the context
of extensible programming languages, such as Racket or
SugarJ, that allow regular programmers to define
language extensions. In this paper, we present a
formalism for building and automatically verifying the
type-soundness of syntactic language extensions. To
build a type-sound language extension with our
formalism, a developer declares an extended syntax,
type rules for the extended syntax, and translation
rules into the (possibly further extended) base
language. Our formalism then validates that the
user-defined type rules are sufficient to guarantee
that the code generated by the translation rules cannot
contain any type errors. This effectively ensures that
an initial type check prior to translation precludes
type errors in generated code. We have implemented a
core system in PLT Redex and we have developed a
syntactically extensible variant of System F$_w$ that
we extend with let notation, monadic do blocks, and
algebraic data types. Our formalism verifies the
soundness of each extension automatically.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Keep:2013:NFC,
author = "Andrew W. Keep and R. Kent Dybvig",
title = "A nanopass framework for commercial compiler
development",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "343--350",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500618",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Contemporary compilers must typically handle
sophisticated high-level source languages, generate
efficient code for multiple hardware architectures and
operating systems, and support source-level debugging,
profiling, and other program development tools. As a
result, compilers tend to be among the most complex of
software systems. Nanopass frameworks are designed to
help manage this complexity. A nanopass compiler is
comprised of many single-task passes with formally
defined intermediate languages. The perceived downside
of a nanopass compiler is that the extra passes will
lead to substantially longer compilation times. To
determine whether this is the case, we have created a
plug replacement for the commercial Chez Scheme
compiler, implemented using an updated nanopass
framework, and we have compared the speed of the new
compiler and the code it generates against the original
compiler for a large set of benchmark programs. This
paper describes the updated nanopass framework, the new
compiler, and the results of our experiments. The
compiler produces faster code than the original,
averaging 15-27\% depending on architecture and
optimization level, due to a more sophisticated but
slower register allocator and improvements to several
optimizations. Compilation times average well within a
factor of two of the original compiler, despite the
slower register allocator and the replacement of five
passes of the original 10 with over 50 nanopasses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{St-Amour:2013:ERA,
author = "Vincent St-Amour and Neil Toronto",
title = "Experience report: applying random testing to a base
type environment",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "351--356",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500616",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As programmers, programming in typed languages
increases our confidence in the correctness of our
programs. As type system designers, soundness proofs
increase our confidence in the correctness of our type
systems. There is more to typed languages than their
typing rules, however. To be usable, a typed language
needs to provide a well-furnished standard library and
to specify types for its exports. As software
artifacts, these base type environments can rival
typecheckers in complexity. Our experience with the
Typed Racket base environment---which accounts for 31\%
of the code in the Typed Racket
implementation---teaches us that writing type
environments can be just as error-prone as writing
typecheckers. We report on our experience over the past
two years of using random testing to increase our
confidence in the correctness of the Typed Racket base
environment.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Petersen:2013:ERF,
author = "Christian L. Petersen and Matthias Gorges and Dustin
Dunsmuir and Mark Ansermino and Guy A. Dumont",
title = "Experience report: functional programming of {mHealth}
applications",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "357--362",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500615",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A modular framework for the development of medical
applications that promotes deterministic, robust and
correct code is presented. The system is based on the
portable Gambit Scheme programming language and
provides a flexible cross-platform environment for
developing graphical applications on mobile devices as
well as medical instrumentation interfaces running on
embedded platforms. Real world applications of this
framework for mobile diagnostics, telemonitoring and
automated drug infusions are reported. The source code
for the core framework is open source and available at:
https://github.com/part-cw/lambdanative.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Delbianco:2013:HSR,
author = "Germ{\'a}n Andr{\'e}s Delbianco and Aleksandar
Nanevski",
title = "{Hoare}-style reasoning with (algebraic)
continuations",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "363--376",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500593",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Continuations are programming abstractions that allow
for manipulating the ``future'' of a computation.
Amongst their many applications, they enable
implementing unstructured program flow through
higher-order control operators such as callcc. In this
paper we develop a Hoare-style logic for the
verification of programs with higher-order control, in
the presence of dynamic state. This is done by
designing a dependent type theory with first class
callcc and abort operators, where pre- and
postconditions of programs are tracked through types.
Our operators are algebraic in the sense of Plotkin and
Power, and Jaskelioff, to reduce the annotation burden
and enable verification by symbolic evaluation. We
illustrate working with the logic by verifying a number
of characteristic examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Turon:2013:URH,
author = "Aaron Turon and Derek Dreyer and Lars Birkedal",
title = "Unifying refinement and {Hoare}-style reasoning in a
logic for higher-order concurrency",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "377--390",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500600",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modular programming and modular verification go hand
in hand, but most existing logics for concurrency
ignore two crucial forms of modularity: *higher-order
functions*, which are essential for building reusable
components, and *granularity abstraction*, a key
technique for hiding the intricacies of fine-grained
concurrent data structures from the clients of those
data structures. In this paper, we present CaReSL, the
first logic to support the use of granularity
abstraction for modular verification of higher-order
concurrent programs. After motivating the features of
CaReSL through a variety of illustrative examples, we
demonstrate its effectiveness by using it to tackle a
significant case study: the first formal proof of
(partial) correctness for Hendler et al.'s ``flat
combining'' algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Chlipala:2013:BSP,
author = "Adam Chlipala",
title = "The bedrock structured programming system: combining
generative metaprogramming and {Hoare} logic in an
extensible program verifier",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "391--402",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500592",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We report on the design and implementation of an
extensible programming language and its intrinsic
support for formal verification. Our language is
targeted at low-level programming of infrastructure
like operating systems and runtime systems. It is based
on a cross-platform core combining characteristics of
assembly languages and compiler intermediate languages.
From this foundation, we take literally the saying that
C is a ``macro assembly language'': we introduce an
expressive notion of certified low-level macros,
sufficient to build up the usual features of C and
beyond as macros with no special support in the core.
Furthermore, our macros have integrated support for
strongest postcondition calculation and verification
condition generation, so that we can provide a
high-productivity formal verification environment
within Coq for programs composed from any combination
of macros. Our macro interface is expressive enough to
support features that low-level programs usually only
access through external tools with no formal
guarantees, such as declarative parsing or SQL-inspired
querying. The abstraction level of these macros only
imposes a compile-time cost, via the execution of
functional Coq programs that compute programs in our
intermediate language; but the run-time cost is not
substantially greater than for more conventional C
code. We describe our experiences constructing a full
C-like language stack using macros, with some
experiments on the verifiability and performance of
individual programs running on that stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Cheney:2013:PTL,
author = "James Cheney and Sam Lindley and Philip Wadler",
title = "A practical theory of language-integrated query",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "403--416",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500586",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Language-integrated query is receiving renewed
attention, in part because of its support through
Microsoft's LINQ framework. We present a practical
theory of language-integrated query based on quotation
and normalisation of quoted terms. Our technique
supports join queries, abstraction over values and
predicates, composition of queries, dynamic generation
of queries, and queries with nested intermediate data.
Higher-order features prove useful even for
constructing first-order queries. We prove a theorem
characterising when a host query is guaranteed to
generate a single SQL query. We present experimental
results confirming our technique works, even in
situations where Microsoft's LINQ framework either
fails to produce an SQL query or, in one case, produces
an avalanche of SQL queries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Garcia:2013:CTB,
author = "Ronald Garcia",
title = "Calculating threesomes, with blame",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "417--428",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500603",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Coercions and threesomes both enable a language to
combine static and dynamic types while avoiding
cast-based space leaks. Coercion calculi elegantly
specify space-efficient cast behavior, even when
augmented with blame tracking, but implementing their
semantics directly is difficult. Threesomes, on the
other hand, have a straightforward recursive
implementation, but endowing them with blame tracking
is challenging. In this paper, we show that you can use
that elegant spec to produce that straightforward
implementation: we use the coercion calculus to derive
threesomes with blame. In particular, we construct
novel threesome calculi for blame tracking strategies
that detect errors earlier, catch more errors, and
reflect an intuitive conception of safe and unsafe
casts based on traditional subtyping.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Dunfield:2013:CEB,
author = "Joshua Dunfield and Neelakantan R. Krishnaswami",
title = "Complete and easy bidirectional typechecking for
higher-rank polymorphism",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "429--442",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500582",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Bidirectional typechecking, in which terms either
synthesize a type or are checked against a known type,
has become popular for its scalability (unlike
Damas-Milner type inference, bidirectional typing
remains decidable even for very expressive type
systems), its error reporting, and its relative ease of
implementation. Following design principles from proof
theory, bidirectional typing can be applied to many
type constructs. The principles underlying a
bidirectional approach to polymorphism, however, are
less obvious. We give a declarative, bidirectional
account of higher-rank polymorphism, grounded in proof
theory; this calculus enjoys many properties such as
eta-reduction and predictability of annotations. We
give an algorithm for implementing the declarative
system; our algorithm is remarkably simple and
well-behaved, despite being both sound and complete.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Johnson:2013:OAA,
author = "J. Ian Johnson and Nicholas Labich and Matthew Might
and David {Van Horn}",
title = "Optimizing abstract abstract machines",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "443--454",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500604",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The technique of abstracting abstract machines (AAM)
provides a systematic approach for deriving computable
approximations of evaluators that are easily proved
sound. This article contributes a complementary
step-by-step process for subsequently going from a
naive analyzer derived under the AAM approach, to an
efficient and correct implementation. The end result of
the process is a two to three order-of-magnitude
improvement over the systematically derived analyzer,
making it competitive with hand-optimized
implementations that compute fundamentally less precise
results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Hritcu:2013:TNQ,
author = "Catalin Hritcu and John Hughes and Benjamin C. Pierce
and Antal Spector-Zabusky and Dimitrios Vytiniotis and
Arthur Azevedo de Amorim and Leonidas Lampropoulos",
title = "Testing noninterference, quickly",
journal = j-SIGPLAN,
volume = "48",
number = "9",
pages = "455--468",
month = sep,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544174.2500574",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 27 18:32:10 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Information-flow control mechanisms are difficult to
design and labor intensive to prove correct. To reduce
the time wasted on proof attempts doomed to fail due to
broken definitions, we advocate modern random testing
techniques for finding counterexamples during the
design process. We show how to use QuickCheck, a
property-based random-testing tool, to guide the design
of a simple information-flow abstract machine. We find
that both sophisticated strategies for generating
well-distributed random programs and readily
falsifiable formulations of noninterference properties
are critically important. We propose several approaches
and evaluate their effectiveness on a collection of
injected bugs of varying subtlety. We also present an
effective technique for shrinking large counterexamples
to minimal, easily comprehensible ones. Taken together,
our best methods enable us to quickly and automatically
generate simple counterexamples for all these bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '13 conference proceedings.",
}
@Article{Lopes:2013:EAP,
author = "Cristina V. Lopes",
title = "Empirical analysis of programming language adoption",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "1--18",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509515",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Some programming languages become widely popular while
others fail to grow beyond their niche or disappear
altogether. This paper uses survey methodology to
identify the factors that lead to language adoption. We
analyze large datasets, including over 200,000
SourceForge projects, 590,000 projects tracked by
Ohloh, and multiple surveys of 1,000-13,000
programmers. We report several prominent findings.
First, language adoption follows a power law; a small
number of languages account for most language use, but
the programming market supports many languages with
niche user bases. Second, intrinsic features have only
secondary importance in adoption. Open source
libraries, existing code, and experience strongly
influence developers when selecting a language for a
project. Language features such as performance,
reliability, and simple semantics do not. Third,
developers will steadily learn and forget languages.
The overall number of languages developers are familiar
with is independent of age. Finally, when considering
intrinsic aspects of languages, developers prioritize
expressivity over correctness. They perceive static
types as primarily helping with the latter, hence
partly explaining the popularity of dynamic
languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2013:SSE,
author = "You Li and Zhendong Su and Linzhang Wang and Xuandong
Li",
title = "Steering symbolic execution to less traveled paths",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "19--32",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509553",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Symbolic execution is a promising testing and analysis
methodology. It systematically explores a program's
execution space and can generate test cases with high
coverage. One significant practical challenge for
symbolic execution is how to effectively explore the
enormous number of program paths in real-world
programs. Various heuristics have been proposed for
guiding symbolic execution, but they are generally
inefficient and ad-hoc. In this paper, we introduce a
novel, unified strategy to guide symbolic execution to
less explored parts of a program. Our key idea is to
exploit a specific type of path spectra, namely the
length-n subpath program spectra, to systematically
approximate full path information for guiding path
exploration. In particular, we use frequency
distributions of explored length- n subpaths to
prioritize ``less traveled'' parts of the program to
improve test coverage and error detection. We have
implemented our general strategy in KLEE, a
state-of-the-art symbolic execution engine. Evaluation
results on the GNU Coreutils programs show that (1)
varying the length n captures program-specific
information and exhibits different degrees of
effectiveness, and (2) our general approach outperforms
traditional strategies in both coverage and error
detection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Carbin:2013:VQR,
author = "Michael Carbin and Sasa Misailovic and Martin C.
Rinard",
title = "Verifying quantitative reliability for programs that
execute on unreliable hardware",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "33--52",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509546",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Emerging high-performance architectures are
anticipated to contain unreliable components that may
exhibit soft errors, which silently corrupt the results
of computations. Full detection and masking of soft
errors is challenging, expensive, and, for some
applications, unnecessary. For example, approximate
computing applications (such as multimedia processing,
machine learning, and big data analytics) can often
naturally tolerate soft errors. We present Rely a
programming language that enables developers to reason
about the quantitative reliability of an application
--- namely, the probability that it produces the
correct result when executed on unreliable hardware.
Rely allows developers to specify the reliability
requirements for each value that a function produces.
We present a static quantitative reliability analysis
that verifies quantitative requirements on the
reliability of an application, enabling a developer to
perform sound and verified reliability engineering. The
analysis takes a Rely program with a reliability
specification and a hardware specification that
characterizes the reliability of the underlying
hardware components and verifies that the program
satisfies its reliability specification when executed
on the underlying unreliable hardware platform. We
demonstrate the application of quantitative reliability
analysis on six computations implemented in Rely.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Huang:2013:ECS,
author = "Jipeng Huang and Michael D. Bond",
title = "Efficient context sensitivity for dynamic analyses via
calling context uptrees and customized memory
management",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "53--72",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509510",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "State-of-the-art dynamic bug detectors such as data
race and memory leak detectors report program locations
that are likely causes of bugs. However, programmers
need more than static program locations to understand
the behavior of increasingly complex and concurrent
software. Dynamic calling context provides additional
information, but it is expensive to record calling
context frequently, e.g., at every read and write.
Context-sensitive dynamic analyses can build and
maintain a calling context tree (CCT) to track calling
context--but in order to reuse existing nodes,
CCT-based approaches require an expensive lookup. This
paper introduces a new approach for context sensitivity
that avoids this expensive lookup. The approach uses a
new data structure called the calling context uptree
(CCU) that adds low overhead by avoiding the lookup and
instead allocating a new node for each context. A key
contribution is that the approach can mitigate the
costs of allocating many nodes by extending tracing
garbage collection (GC): GC collects unused CCU nodes
naturally and efficiently, and we extend GC to merge
duplicate nodes lazily. We implement our CCU-based
approach in a high-performance Java virtual machine and
integrate it with a staleness-based memory leak
detector and happens-before data race detector, so they
can report context-sensitive program locations that
cause bugs. We show that the CCU-based approach, in
concert with an extended GC, provides a compelling
alternative to CCT-based approaches for adding context
sensitivity to dynamic analyses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ureche:2013:MIS,
author = "Vlad Ureche and Cristian Talau and Martin Odersky",
title = "Miniboxing: improving the speed to code size tradeoff
in parametric polymorphism translations",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "73--92",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509537",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Parametric polymorphism enables code reuse and type
safety. Underneath the uniform interface exposed to
programmers, however, its low level implementation has
to cope with inherently non-uniform data: value types
of different sizes and semantics (bytes, integers,
floating point numbers) and reference types (pointers
to heap objects). On the Java Virtual Machine,
parametric polymorphism is currently translated to
bytecode using two competing approaches: homogeneous
and heterogeneous. Homogeneous translation requires
boxing, and thus introduces indirect access delays.
Heterogeneous translation duplicates and adapts code
for each value type individually, producing more
bytecode. Therefore bytecode speed and size are at odds
with each other. This paper proposes a novel
translation that significantly reduces the bytecode
size without affecting the execution speed. The key
insight is that larger value types (such as integers)
can hold smaller ones (such as bytes) thus reducing the
duplication necessary in heterogeneous translations. In
our implementation, on the Scala compiler, we encode
all primitive value types in long integers. The
resulting bytecode approaches the performance of
monomorphic code, matches the performance of the
heterogeneous translation and obtains speedups of up to
22x over the homogeneous translation, all with modest
increases in size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shahriyar:2013:TGR,
author = "Rifat Shahriyar and Stephen Michael Blackburn and Xi
Yang and Kathryn S. McKinley",
title = "Taking off the gloves with reference counting
{Immix}",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "93--110",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509527",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Despite some clear advantages and recent advances,
reference counting remains a poor cousin to
high-performance tracing garbage collectors. The
advantages of reference counting include (a) immediacy
of reclamation, (b) incrementality, and (c) local scope
of its operations. After decades of languishing with
hopelessly bad performance, recent work narrowed the
gap between reference counting and the fastest tracing
collectors to within 10\%. Though a major advance, this
gap remains a substantial barrier to adoption in
performance-conscious application domains. Our work
identifies heap organization as the principal source of
the remaining performance gap. We present the design,
implementation, and analysis of a new collector, RC
Immix, that replaces reference counting's traditional
free-list heap organization with the line and block
heap structure introduced by the Immix collector. The
key innovations of RC Immix are (1) to combine
traditional reference counts with per-line live object
counts to identify reusable memory and (2) to eliminate
fragmentation by integrating copying with reference
counting of new objects and with backup tracing cycle
collection. In RC Immix, reference counting offers
efficient collection and the line and block heap
organization delivers excellent mutator locality and
efficient allocation. With these advances, RC Immix
closes the 10\% performance gap, matching the
performance of a highly tuned production generational
collector. By removing the performance barrier, this
work transforms reference counting into a serious
alternative for meeting high performance objectives for
garbage collected languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Xu:2013:RTO,
author = "Guoqing Xu",
title = "{Resurrector}: a tunable object lifetime profiling
technique for optimizing real-world programs",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "111--130",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Modern object-oriented applications commonly suffer
from severe performance problems that need to be
optimized away for increased efficiency and user
satisfaction. Many existing optimization techniques
(such as object pooling and pretenuring) require
precise identification of object lifetimes. However, it
is particularly challenging to obtain object lifetimes
both precisely and efficiently: precise profiling
techniques such as Merlin introduce several hundred
times slowdown even for small programs while efficient
approximation techniques often sacrifice precision and
produce less useful lifetime information. This paper
presents a tunable profiling technique, called
Resurrector, that explores the middle ground between
high precision and high efficiency to find the
precision-efficiency sweetspot for various
liveness-based optimization techniques. Our evaluation
shows that Resurrector is both more precise and more
efficient than the GC-based approximation, and it is
orders-of-magnitude faster than Merlin. To demonstrate
Resurrector's usefulness, we have developed client
analyses to find allocation sites that create large
data structures with disjoint lifetimes. By inspecting
program source code and reusing data structures created
from these allocation sites, we have achieved
significant performance gains. We have also improved
the precision of an existing optimization technique
using the lifetime information collected by
Resurrector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Norris:2013:CCC,
author = "Brian Norris and Brian Demsky",
title = "{CDSChecker}: checking concurrent data structures
written with {C\slash C++} atomics",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "131--150",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509514",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Writing low-level concurrent software has
traditionally required intimate knowledge of the entire
toolchain and often has involved coding in assembly.
New language standards have extended C and C++ with
support for low-level atomic operations and a weak
memory model, enabling developers to write portable and
efficient multithreaded code. Developing correct
low-level concurrent code is well-known to be
especially difficult under a weak memory model, where
code behavior can be surprising. Building reliable
concurrent software using C/C++ low-level atomic
operations will likely require tools that help
developers discover unexpected program behaviors. In
this paper we present CDSChecker, a tool for
exhaustively exploring the behaviors of concurrent code
under the C/C++ memory model. We develop several novel
techniques for modeling the relaxed behaviors allowed
by the memory model and for minimizing the number of
execution behaviors that CDSChecker must explore. We
have used CDSChecker to exhaustively unit test several
concurrent data structure implementations on specific
inputs and have discovered errors in both a recently
published C11 implementation of a work-stealing queue
and a single producer, single consumer queue
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Raychev:2013:ERD,
author = "Veselin Raychev and Martin Vechev and Manu Sridharan",
title = "Effective race detection for event-driven programs",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "151--166",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509538",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Like shared-memory multi-threaded programs,
event-driven programs such as client-side web
applications are susceptible to data races that are
hard to reproduce and debug. Race detection for such
programs is hampered by their pervasive use of ad hoc
synchronization, which can lead to a prohibitive number
of false positives. Race detection also faces a
scalability challenge, as a large number of
short-running event handlers can quickly overwhelm
standard vector-clock-based techniques. This paper
presents several novel contributions that address both
of these challenges. First, we introduce race coverage,
a systematic method for exposing ad hoc synchronization
and other (potentially harmful) races to the user,
significantly reducing false positives. Second, we
present an efficient connectivity algorithm for
computing race coverage. The algorithm is based on
chain decomposition and leverages the structure of
event-driven programs to dramatically decrease the
overhead of vector clocks. We implemented our
techniques in a tool called EventRacer and evaluated it
on a number of public web sites. The results indicate
substantial performance and precision improvements of
our approach over the state-of-the-art. Using
EventRacer, we found many harmful races, most of which
are beyond the reach of current techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bolz:2013:SSC,
author = "Carl Friedrich Bolz and Lukas Diekmann and Laurence
Tratt",
title = "Storage strategies for collections in dynamically
typed languages",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "167--182",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Dynamically typed language implementations often use
more memory and execute slower than their statically
typed cousins, in part because operations on
collections of elements are unoptimised. This paper
describes storage strategies, which dynamically
optimise collections whose elements are instances of
the same primitive type. We implement storage
strategies in the PyPy virtual machine, giving a
performance increase of 18\% on wide-ranging benchmarks
of real Python programs. We show that storage
strategies are simple to implement, needing only
1500LoC in PyPy, and have applicability to a wide range
of virtual machines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Miller:2013:IPG,
author = "Heather Miller and Philipp Haller and Eugene Burmako
and Martin Odersky",
title = "Instant pickles: generating object-oriented pickler
combinators for fast and extensible serialization",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "183--202",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509547",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "As more applications migrate to the cloud, and as
``big data'' edges into even more production
environments, the performance and simplicity of
exchanging data between compute nodes/devices is
increasing in importance. An issue central to
distributed programming, yet often under-considered, is
serialization or pickling, i.e., persisting runtime
objects by converting them into a binary or text
representation. Pickler combinators are a popular
approach from functional programming; their
composability alleviates some of the tedium of writing
pickling code by hand, but they don't translate well to
object-oriented programming due to qualities like open
class hierarchies and subtyping polymorphism.
Furthermore, both functional pickler combinators and
popular, Java-based serialization frameworks tend to be
tied to a specific pickle format, leaving programmers
with no choice of how their data is persisted. In this
paper, we present object-oriented pickler combinators
and a framework for generating them at compile-time,
called scala/pickling, designed to be the default
serialization mechanism of the Scala programming
language. The static generation of OO picklers enables
significant performance improvements, outperforming
Java and Kryo in most of our benchmarks. In addition to
high performance and the need for little to no
boilerplate, our framework is extensible: using the
type class pattern, users can provide both (1) custom,
easily interchangeable pickle formats and (2) custom
picklers, to override the default behavior of the
pickling framework. In benchmarks, we compare
scala/pickling with other popular industrial
frameworks, and present results on time, memory usage,
and size when pickling/unpickling a number of data
types used in real-world, large-scale distributed
applications and frameworks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Salkeld:2013:IDO,
author = "Robin Salkeld and Gregor Kiczales",
title = "Interacting with dead objects",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "203--216",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509543",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Debugging and analyzing a snapshot of a crashed
program's memory is far more difficult than working
with a live program, because debuggers can no longer
execute code to help make sense of the program state.
We present an architecture that supports the restricted
execution of ordinary code starting from the snapshot,
as if the dead objects within it had been restored, but
without access to their original external environment.
We demonstrate the feasibility of this approach via an
implementation for Java that does not require a custom
virtual machine, show that it performs competitively
with live execution, and use it to diagnose an
unresolved memory leak in a mature mainstream
application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Politz:2013:PFM,
author = "Joe Gibbs Politz and Alejandro Martinez and Matthew
Milano and Sumner Warren and Daniel Patterson and
Junsong Li and Anand Chitipothu and Shriram
Krishnamurthi",
title = "{Python}: the full monty",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "217--232",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We present a small-step operational semantics for the
Python programming language. We present both a core
language for Python, suitable for tools and proofs, and
a translation process for converting Python source to
this core. We have tested the composition of
translation and evaluation of the core for conformance
with the primary Python implementation, thereby giving
confidence in the fidelity of the semantics. We briefly
report on the engineering of these components. Finally,
we examine subtle aspects of the language, identifying
scope as a pervasive concern that even impacts features
that might be considered orthogonal.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gerakios:2013:FIS,
author = "Prodromos Gerakios and Aggelos Biboudis and Yannis
Smaragdakis",
title = "Forsaking inheritance: supercharged delegation in
{DelphJ}",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "233--252",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509535",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We propose DelphJ: a Java-based OO language that
eschews inheritance completely, in favor of a
combination of class morphing and (deep) delegation.
Compared to past delegation approaches, the novel
aspect of our design is the ability to emulate the best
aspects of inheritance while retaining maximum
flexibility: using morphing, a class can select any of
the methods of its delegatee and export them (if
desired) or transform them (e.g., to add extra
arguments or modify type signatures), yet without
needing to name these methods explicitly and handle
them one-by-one. Compared to past work on morphing, our
approach adopts and adapts advanced delegation
mechanisms, in order to add late binding capabilities
and, thus, provide a full substitute of inheritance.
Additionally, we explore complex semantic issues in the
interaction of delegation with late binding. We present
our language design both informally, with numerous
examples, and formally in a core calculus.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Smaragdakis:2013:SBP,
author = "Yannis Smaragdakis and George Balatsouras and George
Kastrinis",
title = "Set-based pre-processing for points-to analysis",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "253--270",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509524",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We present set-based pre-analysis: a virtually
universal optimization technique for flow-insensitive
points-to analysis. Points-to analysis computes a
static abstraction of how object values flow through a
program's variables. Set-based pre-analysis relies on
the observation that much of this reasoning can take
place at the set level rather than the value level.
Computing constraints at the set level results in
significant optimization opportunities: we can rewrite
the input program into a simplified form with the same
essential points-to properties. This rewrite results in
removing both local variables and instructions, thus
simplifying the subsequent value-based points-to
computation. Effectively, set-based pre-analysis puts
the program in a normal form optimized for points-to
analysis. Compared to other techniques for off-line
optimization of points-to analyses in the literature,
the new elements of our approach are the ability to
eliminate statements, and not just variables, as well
as its modularity: set-based pre-analysis can be
performed on the input just once, e.g., allowing the
pre-optimization of libraries that are subsequently
reused many times and for different analyses. In
experiments with Java programs, set-based pre-analysis
eliminates 30\% of the program's local variables and
30\% or more of computed context-sensitive points-to
facts, over a wide set of benchmarks and analyses,
resulting in a ~20\% average speedup (max: 110\%,
median: 18\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tetali:2013:MSA,
author = "Sai Deep Tetali and Mohsen Lesani and Rupak Majumdar
and Todd Millstein",
title = "{MrCrypt}: static analysis for secure cloud
computations",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "271--286",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509554",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "In a common use case for cloud computing, clients
upload data and computation to servers that are managed
by a third-party infrastructure provider. We describe
MrCrypt, a system that provides data confidentiality in
this setting by executing client computations on
encrypted data. MrCrypt statically analyzes a program
to identify the set of operations on each input data
column, in order to select an appropriate homomorphic
encryption scheme for that column, and then transforms
the program to operate over encrypted data. The
encrypted data and transformed program are uploaded to
the server and executed as usual, and the result of the
computation is decrypted on the client side. We have
implemented MrCrypt for Java and illustrate its
practicality on three standard benchmark suites for the
Hadoop MapReduce framework. We have also formalized the
approach and proven several soundness and security
guarantees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{DeLozier:2013:ICL,
author = "Christian DeLozier and Richard Eisenberg and Santosh
Nagarakatte and Peter-Michael Osera and Milo M. K.
Martin and Steve Zdancewic",
title = "{Ironclad C++}: a library-augmented type-safe subset
of {C++}",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "287--304",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509550",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "The C++ programming language remains widely used,
despite inheriting many unsafe features from
C---features that often lead to failures of type or
memory safety that manifest as buffer overflows,
use-after-free vulnerabilities, or abstraction
violations. Malicious attackers can exploit such
violations to compromise application and system
security. This paper introduces Ironclad C++, an
approach to bringing the benefits of type and memory
safety to C++. Ironclad C++ is, in essence, a
library-augmented, type-safe subset of C++. All
Ironclad C++ programs are valid C++ programs that can
be compiled using standard, off-the-shelf C++
compilers. However, not all valid C++ programs are
valid Ironclad C++ programs: a syntactic source-code
validator statically prevents the use of unsafe C++
features. To enforce safety properties that are
difficult to check statically, Ironclad C++ applies
dynamic checks via templated ``smart pointer'' classes.
Using a semi-automatic refactoring tool, we have ported
nearly 50K lines of code to Ironclad C++. These
benchmarks incur a performance overhead of 12\% on
average, compared to the original unsafe C++ code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Richards:2013:FAC,
author = "Gregor Richards and Christian Hammer and Francesco
Zappa Nardelli and Suresh Jagannathan and Jan Vitek",
title = "Flexible access control for {JavaScript}",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "305--322",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509542",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Providing security guarantees for systems built out of
untrusted components requires the ability to define and
enforce access control policies over untrusted code. In
Web 2.0 applications, JavaScript code from different
origins is often combined on a single page, leading to
well-known vulnerabilities. We present a security
infrastructure which allows users and content providers
to specify access control policies over subsets of a
JavaScript program by leveraging the concept of
delimited histories with revocation. We implement our
proposal in WebKit and evaluate it with three policies
on 50 widely used websites with no changes to their
JavaScript code and report performance overheads and
violations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Feldthaus:2013:SAR,
author = "Asger Feldthaus and Anders M{\o}ller",
title = "Semi-automatic rename refactoring for {JavaScript}",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "323--338",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509520",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Modern IDEs support automated refactoring for many
programming languages, but support for JavaScript is
still primitive. To perform renaming, which is one of
the fundamental refactorings, there is often no
practical alternative to simple syntactic
search-and-replace. Although more sophisticated
alternatives have been developed, they are limited by
whole-program assumptions and poor scalability. We
propose a technique for semi-automatic refactoring for
JavaScript, with a focus on renaming. Unlike
traditional refactoring algorithms, semi-automatic
refactoring works by a combination of static analysis
and interaction with the programmer. With this
pragmatic approach, we can provide scalable and
effective refactoring support for real-world code,
including libraries and incomplete applications.
Through a series of experiments that estimate how much
manual effort our technique demands from the
programmer, we show that our approach is a useful
improvement compared to search-and-replace tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Raychev:2013:RS,
author = "Veselin Raychev and Max Sch{\"a}fer and Manu Sridharan
and Martin Vechev",
title = "Refactoring with synthesis",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "339--354",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509544",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Refactoring has become an integral part of modern
software development, with wide support in popular
integrated development environments (IDEs). Modern IDEs
provide a fixed set of supported refactorings, listed
in a refactoring menu. But with IDEs supporting more
and more refactorings, it is becoming increasingly
difficult for programmers to discover and memorize all
their names and meanings. Also, since the set of
refactorings is hard-coded, if a programmer wants to
achieve a slightly different code transformation, she
has to either apply a (possibly non-obvious) sequence
of several built-in refactorings, or just perform the
transformation by hand. We propose a novel approach to
refactoring, based on synthesis from examples, which
addresses these limitations. With our system, the
programmer need not worry how to invoke individual
refactorings or the order in which to apply them.
Instead, a transformation is achieved via three simple
steps: the programmer first indicates the start of a
code refactoring phase; then she performs some of the
desired code changes manually; and finally, she asks
the tool to complete the refactoring. Our system
completes the refactoring by first extracting the
difference between the starting program and the
modified version, and then synthesizing a sequence of
refactorings that achieves (at least) the desired
changes. To enable scalable synthesis, we introduce
local refactorings, which allow for first discovering a
refactoring sequence on small program fragments and
then extrapolating it to a full refactoring sequence.
We implemented our approach as an Eclipse plug-in, with
an architecture that is easily extendible with new
refactorings. The experimental results are encouraging:
with only minimal user input, the synthesizer was able
to quickly discover complex refactoring sequences for
several challenging realistic examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bois:2013:BGV,
author = "Kristof {Du Bois} and Jennifer B. Sartor and Stijn
Eyerman and Lieven Eeckhout",
title = "Bottle graphs: visualizing scalability bottlenecks in
multi-threaded applications",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "355--372",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Understanding and analyzing multi-threaded program
performance and scalability is far from trivial, which
severely complicates parallel software development and
optimization. In this paper, we present bottle graphs,
a powerful analysis tool that visualizes multi-threaded
program performance, in regards to both per-thread
parallelism and execution time. Each thread is
represented as a box, with its height equal to the
share of that thread in the total program execution
time, its width equal to its parallelism, and its area
equal to its total running time. The boxes of all
threads are stacked upon each other, leading to a stack
with height equal to the total program execution time.
Bottle graphs show exactly how scalable each thread is,
and thus guide optimization towards those threads that
have a smaller parallel component (narrower), and a
larger share of the total execution time (taller), i.e.
to the 'neck' of the bottle. Using light-weight OS
modules, we calculate bottle graphs for unmodified
multi-threaded programs running on real processors with
an average overhead of 0.68\%. To demonstrate their
utility, we do an extensive analysis of 12 Java
benchmarks running on top of the Jikes JVM, which
introduces many JVM service threads. We not only reveal
and explain scalability limitations of several
well-known Java benchmarks; we also analyze the reasons
why the garbage collector itself does not scale, and in
fact performs optimally with two collector threads for
all benchmarks, regardless of the number of application
threads. Finally, we compare the scalability of Jikes
versus the OpenJDK JVM. We demonstrate how useful and
intuitive bottle graphs are as a tool to analyze
scalability and help optimize multi-threaded
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{DElia:2013:BLP,
author = "Daniele Cono D'Elia and Camil Demetrescu",
title = "{Ball--Larus} path profiling across multiple loop
iterations",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "373--390",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Identifying the hottest paths in the control flow
graph of a routine can direct optimizations to portions
of the code where most resources are consumed. This
powerful methodology, called path profiling, was
introduced by Ball and Larus in the mid 90's [4] and
has received considerable attention in the last 15
years for its practical relevance. A shortcoming of the
Ball-Larus technique was the inability to profile
cyclic paths, making it difficult to mine execution
patterns that span multiple loop iterations. Previous
results, based on rather complex algorithms, have
attempted to circumvent this limitation at the price of
significant performance losses even for a small number
of iterations. In this paper, we present a new approach
to multi-iteration path profiling, based on data
structures built on top of the original Ball-Larus
numbering technique. Our approach allows the profiling
of all executed paths obtained as a concatenation of up
to k Ball-Larus acyclic paths, where k is a
user-defined parameter. We provide examples showing
that this method can reveal optimization opportunities
that acyclic-path profiling would miss. An extensive
experimental investigation on a large variety of Java
benchmarks on the Jikes RVM shows that our approach can
be even faster than Ball-Larus due to fewer operations
on smaller hash tables, producing compact
representations of cyclic paths even for large values
of k.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sharma:2013:DDE,
author = "Rahul Sharma and Eric Schkufza and Berkeley Churchill
and Alex Aiken",
title = "Data-driven equivalence checking",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "391--406",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We present a data driven algorithm for equivalence
checking of two loops. The algorithm infers simulation
relations using data from test runs. Once a candidate
simulation relation has been obtained, off-the-shelf
SMT solvers are used to check whether the simulation
relation actually holds. The algorithm is sound:
insufficient data will cause the proof to fail. We
demonstrate a prototype implementation, called DDEC, of
our algorithm, which is the first sound equivalence
checker for loops written in x86 assembly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kneuss:2013:SMR,
author = "Etienne Kneuss and Ivan Kuraj and Viktor Kuncak and
Philippe Suter",
title = "Synthesis modulo recursive functions",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "407--426",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509555",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We describe techniques for synthesis and verification
of recursive functional programs over unbounded
domains. Our techniques build on top of an algorithm
for satisfiability modulo recursive functions, a
framework for deductive synthesis, and complete
synthesis procedures for algebraic data types. We
present new counterexample-guided algorithms for
constructing verified programs. We have implemented
these algorithms in an integrated environment for
interactive verification and synthesis from relational
specifications. Our system was able to synthesize a
number of useful recursive functions that manipulate
unbounded numbers and data structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Shi:2013:COU,
author = "Yao Shi and Bernard Blackham and Gernot Heiser",
title = "Code optimizations using formally verified
properties",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "427--442",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509513",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Formal program verification offers strong assurance of
correctness, backed by the strength of mathematical
proof. Constructing these proofs requires humans to
identify program invariants, and show that they are
always maintained. These invariants are then used to
prove that the code adheres to its specification. In
this paper, we explore the overlap between formal
verification and code optimization. We propose two
approaches to reuse the invariants derived in formal
proofs and integrate them into compilation. The first
applies invariants extracted from the proof, while the
second leverages the property of program safety (i.e.,
the absence of bugs). We reuse this information to
improve the performance of generated object code. We
evaluated these methods on seL4, a real-world
formally-verified microkernel, and obtained
improvements in average runtime performance (up to
28\%) and in worst-case execution time (up to 25\%). In
macro-benchmarks, we found the performance of
para-virtualized Linux running on the microkernel
improved by 6-16\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dillig:2013:IIG,
author = "Isil Dillig and Thomas Dillig and Boyang Li and Ken
McMillan",
title = "Inductive invariant generation via abductive
inference",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "443--456",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "This paper presents a new method for generating
inductive loop invariants that are expressible as
boolean combinations of linear integer constraints. The
key idea underlying our technique is to perform a
backtracking search that combines Hoare-style
verification condition generation with a logical
abduction procedure based on quantifier elimination to
speculate candidate invariants. Starting with true, our
method iteratively strengthens loop invariants until
they are inductive and strong enough to verify the
program. A key feature of our technique is that it is
lazy: It only infers those invariants that are
necessary for verifying program correctness.
Furthermore, our technique can infer arbitrary boolean
combinations (including disjunctions) of linear
invariants. We have implemented the proposed approach
in a tool called HOLA. Our experiments demonstrate that
HOLA can infer interesting invariants that are beyond
the reach of existing state-of-the-art invariant
generation tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hoppe:2013:DDB,
author = "Michael Hoppe and Stefan Hanenberg",
title = "Do developers benefit from generic types?: an
empirical comparison of generic and raw types in
{Java}",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "457--474",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509528",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Type systems that permit developers to express
themselves more precisely are one of the primary topics
in programming language research, as well as in
industrial software development. While it seems
plausible that an expressive static type system
increases developer productivity, there is little
empirical evidence for or against this hypothesis.
Generic types in Java are an example: as an extension
of Java's original type system, some claim that Java
1.5 improves the type system's ``expressiveness.'' Even
if this claim is true, there exists little empirical
evidence that claimed expressiveness leads to a
measurable increase in developer productivity. This
paper introduces an experiment where generic types (in
comparison to raw types) have been evaluated in three
different directions: (1) the documentation impact on
undocumented APIs, (2) the time required for fixing
type errors, and (3) the extensibility of a generic
type hierarchy. The results of the experiment suggest
that generic types improve documentation and reduce
extensibility --- without revealing a difference in the
time required for fixing type errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dimoulas:2013:OC,
author = "Christos Dimoulas and Robert Bruce Findler and
Matthias Felleisen",
title = "Option contracts",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "475--494",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509548",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Many languages support behavioral software contracts
so that programmers can describe a component's
obligations and promises via logical assertions in its
interface. The contract system monitors program
execution, checks whether the assertions hold, and, if
not, blames the guilty component. Pinning down the
violator gets the debugging process started in the
right direction. Quality contracts impose a serious
run-time cost, however, and programmers therefore
compromise in many ways. Some turn off contracts for
deployment, but then contracts and code quickly get out
of sync during maintenance. Others test contracts
randomly or probabilistically. In all cases,
programmers have to cope with lack of blame information
when the program eventually fails. In response, we
propose option contracts as an addition to the contract
tool box. Our key insight is that in ordinary contract
systems, server components impose their contract on
client components, giving them no choice whether to
trust the server's promises or check them. With option
contracts, server components may choose to tag a
contract as an option and clients may choose to
exercise the option or accept it, in which case they
also shoulder some responsibility. We show that option
contracts permit programmers to specify flexible
checking policies, that their cost is reasonable, and
that they satisfy a complete monitoring theorem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Treichler:2013:LSD,
author = "Sean Treichler and Michael Bauer and Alex Aiken",
title = "Language support for dynamic, hierarchical data
partitioning",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "495--514",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509545",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Applications written for distributed-memory parallel
architectures must partition their data to enable
parallel execution. As memory hierarchies become
deeper, it is increasingly necessary that the data
partitioning also be hierarchical to match. Current
language proposals perform this hierarchical
partitioning statically, which excludes many important
applications where the appropriate partitioning is
itself data dependent and so must be computed
dynamically. We describe Legion, a region-based
programming system, where each region may be
partitioned into subregions. Partitions are computed
dynamically and are fully programmable. The division of
data need not be disjoint and subregions of a region
may overlap, or alias one another. Computations use
regions with certain privileges (e.g., expressing that
a computation uses a region read-only) and data
coherence (e.g., expressing that the computation need
only be atomic with respect to other operations on the
region), which can be controlled on a per-region (or
subregion) basis. We present the novel aspects of the
Legion design, in particular the combination of static
and dynamic checks used to enforce soundness. We give
an extended example illustrating how Legion can express
computations with dynamically determined relationships
between computations and data partitions. We prove the
soundness of Legion's type system, and show Legion type
checking improves performance by up to 71\% by eliding
provably safe memory checks. In particular, we show
that the dynamic checks to detect aliasing at runtime
at the region granularity have negligible overhead. We
report results for three real-world applications
running on distributed memory machines, achieving up to
62.5X speedup on 96 GPUs on the Keeneland
supercomputer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Balatsouras:2013:CHC,
author = "George Balatsouras and Yannis Smaragdakis",
title = "Class hierarchy complementation: soundly completing a
partial type graph",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "515--532",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509530",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We present the problem of class hierarchy
complementation: given a partially known hierarchy of
classes together with subtyping constraints (``A has to
be a transitive subtype of B'') complete the hierarchy
so that it satisfies all constraints. The problem has
immediate practical application to the analysis of
partial programs--e.g., it arises in the process of
providing a sound handling of ``phantom classes'' in
the Soot program analysis framework. We provide
algorithms to solve the hierarchy complementation
problem in the single inheritance and multiple
inheritance settings. We also show that the problem in
a language such as Java, with single inheritance but
multiple subtyping and distinguished class vs.
interface types, can be decomposed into separate
single- and multiple-subtyping instances. We implement
our algorithms in a tool, JPhantom, which complements
partial Java bytecode programs so that the result is
guaranteed to satisfy the Java verifier requirements.
JPhantom is highly scalable and runs in mere seconds
even for large input applications and complex
constraints (with a maximum of 14s for a 19MB
binary).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ravichandran:2013:MES,
author = "Kaushik Ravichandran and Santosh Pande",
title = "Multiverse: efficiently supporting distributed
high-level speculation",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "533--552",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Algorithmic speculation or high-level speculation is a
promising programming paradigm which allows programmers
to speculatively branch an execution into multiple
independent parallel sections and then choose the best
(perhaps fastest) amongst them. The continuing
execution after the speculatively branched section sees
only the modifications made by the best one. This
programming paradigm allows programmers to harness
parallelism and can provide dramatic performance
improvements. In this paper we present the Multiverse
speculative programming model. Multiverse allows
programmers to exploit parallelism through high-level
speculation. It can effectively harness large amounts
of parallelism by speculating across an entire cluster
and is not bound by the parallelism available in a
single machine. We present abstractions and a runtime
which allow programmers to introduce large scale
high-level speculative parallelism into applications
with minimal effort. We introduce a novel on-demand
address space sharing mechanism which provide
speculations efficient transparent access to the
original address space of the application (including
the use of pointers) across machine boundaries.
Multiverse provides single commit semantics across
speculations while guaranteeing isolation between them.
We also introduce novel mechanisms to deal with
scalability bottlenecks when there are a large number
of speculations. We demonstrate that for several
benchmarks, Multiverse achieves impressive speedups and
good scalability across entire clusters. We study the
overheads of the runtime and demonstrate how our
special scalability mechanisms are crucial in scaling
cluster wide.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Clebsch:2013:FCG,
author = "Sylvan Clebsch and Sophia Drossopoulou",
title = "Fully concurrent garbage collection of actors on
many-core machines",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "553--570",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509557",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Disposal of dead actors in actor-model languages is as
important as disposal of unreachable objects in
object-oriented languages. In current practice,
programmers are required to either manually terminate
actors, or they have to rely on garbage collection
systems that monitor actor mutation through write
barriers, thread coordination through locks etc. These
techniques, however, prevent the collector from being
fully concurrent. We developed a protocol that allows
garbage collection to run fully concurrently with all
actors. The main challenges in concurrent garbage
collection is the detection of cycles of sleeping
actors in the actors graph, in the presence of
concurrent mutation of this graph. Our protocol is
solely built on message passing: it uses deferred
direct reference counting, a dedicated actor for the
detection of (cyclic) garbage, and a confirmation
protocol (to deal with the mutation of the actor
graph). We present our ideas informally through an
example, and then present a formal model, prove
soundness and argue completeness. We have implemented
the protocol as part of a runtime library. As a
preliminary performance evaluation, we discuss the
performance of our approach as currently used at a
financial institution, and use four benchmarks from the
literature to compare our approach with other
actor-model systems. These preliminary results indicate
that the overhead of our approach is small.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhao:2013:INT,
author = "Jisheng Zhao and Roberto Lublinerman and Zoran
Budimli{\'c} and Swarat Chaudhuri and Vivek Sarkar",
title = "Isolation for nested task parallelism",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "571--588",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Isolation--the property that a task can access shared
data without interference from other tasks--is one of
the most basic concerns in parallel programming.
Whilethere is a large body of past work on isolated
task-parallelism, the integration of isolation,
task-parallelism, and nesting of tasks has been a
difficult and unresolved challenge. In this pa- per, we
present a programming and execution model called Otello
where isolation is extended to arbitrarily nested
parallel tasks with irregular accesses to heap data. At
the same time, no additional burden is imposed on the
programmer, who only exposes parallelism by creating
and synchronizing parallel tasks, leaving the job of
ensuring isolation to the underlying compiler and
runtime system. Otello extends our past work on Aida
execution model and the delegated isolation mechanism
[22] to the setting of nested parallelism. The basic
runtime construct in Aida and Otello is an assembly: a
task equipped with a region in the shared heap that it
owns. When an assembly A conflicts with an assembly B,
A transfers--or delegates--its code and owned region to
a carefully selected assembly C in a way that will
ensure isolation with B, leaving the responsibility of
re-executing task A to C. The choice of C depends on
the nesting relationship between A and B.We have
implemented Otello on top of the Habanero Java (HJ)
parallel programming language [8], and used this
implementation to evaluate Otello on collections of
nested task-parallel benchmarks and non-nested
transactional benchmarks from past work. On the nested
task-parallel benchmarks, Otello achieves scalability
comparable to HJ programs without built-in isolation,
and the relative overhead of Otello is lower than that
of many published data-race detection algorithms that
detect the isolation violations (but do not enforce
isolation). For the transactional benchmarks, Otello
incurs lower overhead than a state-of-the-art software
transactional memory system (Deuce STM).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Tripp:2013:TNP,
author = "Omer Tripp and Eric Koskinen and Mooly Sagiv",
title = "Turning nondeterminism into parallelism",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "589--604",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509533",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Nondeterminism is a useful and prevalent concept in
the design and implementation of software systems. An
important property of nondeterminism is its latent
parallelism: A nondeterministic action can evaluate to
multiple behaviors. If at least one of these behaviors
does not conflict with concurrent tasks, then there is
an admissible execution of the action in parallel with
these tasks. Unfortunately, existing implementations of
the atomic paradigm --- optimistic as well as
pessimistic --- are unable to fully exhaust the
parallelism potential of nondeterministic actions,
lacking the means to guide concurrent tasks toward
nondeterministic choices that minimize interference.
This paper investigates the problem of utilizing
parallelism due to nondeterminism. We observe that
nondeterminism occurs in many real-world codes. We
motivate the need for devising coordination mechanisms
that can utilize available nondeterminism. We have
developed a system featuring such mechanisms, which
leverages nondeterminism in a wide class of query
operations, allowing a task to look into the future of
concurrent tasks that mutate the shared state during
query evaluation and reduce conflict accordingly. We
evaluate our system on a suite of 12 algorithmic
benchmarks of wide applicability, as well as an
industrial application. The results are encouraging.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chong:2013:BIS,
author = "Nathan Chong and Alastair F. Donaldson and Paul H. J.
Kelly and Jeroen Ketema and Shaz Qadeer",
title = "Barrier invariants: a shared state abstraction for the
analysis of data-dependent {GPU} kernels",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "605--622",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Data-dependent GPU kernels, whose data or control flow
are dependent on the input of the program, are
difficult to verify because they require reasoning
about shared state manipulated by many parallel
threads. Existing verification techniques for GPU
kernels achieve soundness and scalability by using a
two-thread reduction and making the contents of the
shared state nondeterministic each time threads
synchronise at a barrier, to account for all possible
thread interactions. This coarse abstraction prohibits
verification of data-dependent kernels. We present
barrier invariants, a novel abstraction technique which
allows key properties about the shared state of a
kernel to be preserved across barriers during formal
reasoning. We have integrated barrier invariants with
the GPUVerify tool, and present a detailed case study
showing how they can be used to verify three prefix sum
algorithms, allowing efficient modular verification of
a stream compaction kernel, a key building block for
GPU programming. This analysis goes significantly
beyond what is possible using existing verification
techniques for GPU kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Choi:2013:GGT,
author = "Wontae Choi and George Necula and Koushik Sen",
title = "Guided {GUI} testing of {Android} apps with minimal
restart and approximate learning",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "623--640",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509552",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Smartphones and tablets with rich graphical user
interfaces (GUI) are becoming increasingly popular.
Hundreds of thousands of specialized applications,
called apps, are available for such mobile platforms.
Manual testing is the most popular technique for
testing graphical user interfaces of such apps. Manual
testing is often tedious and error-prone. In this
paper, we propose an automated technique, called
Swift-Hand, for generating sequences of test inputs for
Android apps. The technique uses machine learning to
learn a model of the app during testing, uses the
learned model to generate user inputs that visit
unexplored states of the app, and uses the execution of
the app on the generated inputs to refine the model. A
key feature of the testing algorithm is that it avoids
restarting the app, which is a significantly more
expensive operation than executing the app on a
sequence of inputs. An important insight behind our
testing algorithm is that we do not need to learn a
precise model of an app, which is often computationally
intensive, if our goal is to simply guide test
execution into unexplored parts of the state space. We
have implemented our testing algorithm in a publicly
available tool for Android apps written in Java. Our
experimental results show that we can achieve
significantly better coverage than traditional random
testing and L*-based testing in a given time budget.
Our algorithm also reaches peak coverage faster than
both random and L*-based testing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Azim:2013:TDF,
author = "Tanzirul Azim and Iulian Neamtiu",
title = "Targeted and depth-first exploration for systematic
testing of {Android} apps",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "641--660",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509549",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Systematic exploration of Android apps is an enabler
for a variety of app analysis and testing tasks.
Performing the exploration while apps run on actual
phones is essential for exploring the full range of app
capabilities. However, exploring real-world apps on
real phones is challenging due to non-determinism,
non-standard control flow, scalability and overhead
constraints. Relying on end-users to conduct the
exploration might not be very effective: we performed a
7-use study on popular Android apps, and found that the
combined 7-use coverage was 30.08\% of the app screens
and 6.46\% of the app methods. Prior approaches for
automated exploration of Android apps have run apps in
an emulator or focused on small apps whose source code
was available. To address these problems, we present
A$^3$ E, an approach and tool that allows substantial
Android apps to be explored systematically while
running on actual phones, yet without requiring access
to the app's source code. The key insight of our
approach is to use a static, taint-style, dataflow
analysis on the app bytecode in a novel way, to
construct a high-level control flow graph that captures
legal transitions among activities (app screens). We
then use this graph to develop an exploration strategy
named Targeted Exploration that permits fast, direct
exploration of activities, including activities that
would be difficult to reach during normal use. We also
developed a strategy named Depth-first Exploration that
mimics user actions for exploring activities and their
constituents in a slower, but more systematic way. To
measure the effectiveness of our techniques, we use two
metrics: activity coverage (number of screens explored)
and method coverage. Experiments with using our
approach on 25 popular Android apps including BBC News,
Gas Buddy, Amazon Mobile, YouTube, Shazam Encore, and
CNN, show that our exploration techniques achieve
59.39--64.11\% activity coverage and 29.53--36.46\%
method coverage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kansal:2013:LAB,
author = "Aman Kansal and Scott Saponas and A. J. Bernheim Brush
and Kathryn S. McKinley and Todd Mytkowicz and Ryder
Ziola",
title = "The latency, accuracy, and battery {(LAB)}
abstraction: programmer productivity and energy
efficiency for continuous mobile context sensing",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "661--676",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509541",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Emerging mobile applications that sense context are
poised to delight and entertain us with timely news and
events, health tracking, and social connections.
Unfortunately, sensing algorithms quickly drain the
phone's battery. Developers can overcome battery drain
by carefully optimizing context sensing but that makes
programming with context arduous and ties applications
to current sensing hardware. These types of
applications embody a twist on the classic tension
between programmer productivity and performance due to
their combination of requirements. This paper
identifies the latency, accuracy, battery (LAB)
abstraction to resolve this tension. We implement and
evaluate LAB in a system called Senergy. Developers
specify their LAB requirements independent of inference
algorithms and sensors. Senergy delivers energy
efficient context while meeting the requirements and
adapts as hardware changes. We demonstrate LAB's
expressiveness by using it to implement 22 context
sensing algorithms for four types of context (location,
driving, walking, and stationary) and six diverse
applications. To demonstrate LAB's energy
optimizations, we show often an order of magnitude
improvements in energy efficiency on applications
compared to prior approaches. This relatively simple,
priority based API, may serve as a blueprint for future
API design in an increasingly complex design space that
must tradeoff latency, accuracy, and efficiency to meet
application needs and attain portability across
evolving, sensor-rich, heterogeneous, and power
constrained hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bergan:2013:ICS,
author = "Tom Bergan and Luis Ceze and Dan Grossman",
title = "Input-covering schedules for multithreaded programs",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "677--692",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509508",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We propose constraining multithreaded execution to
small sets of input-covering schedules, which we define
as follows: given a program $P$, we say that a set of
schedules $ \Sigma $ covers all inputs of program $P$
if, when given any input, $P$'s execution can be
constrained to some schedule in $ \Sigma $ and still
produce a semantically valid result. Our approach is to
first compute a small $ \Sigma $ for a given program
$P$, and then, at runtime, constrain $P$'s execution to
always follow some schedule in $ \Sigma $, and never
deviate. We have designed an algorithm that uses
symbolic execution to systematically enumerate a set of
input-covering schedules, $ \Sigma $. To deal with
programs that run for an unbounded length of time, we
partition execution into bounded epochs, find
input-covering schedules for each epoch in isolation,
and then piece the schedules together at runtime. We
have implemented this algorithm along with a
constrained execution runtime for pthreads programs,
and we report results Our approach has the following
advantage: because all possible runtime schedules are
known a priori, we can seek to validate the program by
thoroughly verifying each schedule in $ \Sigma $, in
isolation, without needing to reason about the huge
space of thread interleavings that arises due to
conventional nondeterministic execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bond:2013:OCC,
author = "Michael D. Bond and Milind Kulkarni and Man Cao and
Minjia Zhang and Meisam Fathi Salmi and Swarnendu
Biswas and Aritra Sengupta and Jipeng Huang",
title = "{OCTET}: capturing and controlling cross-thread
dependences efficiently",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "693--712",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509519",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Parallel programming is essential for reaping the
benefits of parallel hardware, but it is notoriously
difficult to develop and debug reliable, scalable
software systems. One key challenge is that modern
languages and systems provide poor support for ensuring
concurrency correctness properties --- atomicity,
sequential consistency, and multithreaded determinism
--- because all existing approaches are impractical.
Dynamic, software-based approaches slow programs by up
to an order of magnitude because capturing and
controlling cross-thread dependences (i.e., conflicting
accesses to shared memory) requires synchronization at
virtually every access to potentially shared memory.
This paper introduces a new software-based concurrency
control mechanism called OCTET that soundly captures
cross-thread dependences and can be used to build
dynamic analyses for concurrency correctness. OCTET
achieves low overheads by tracking the locality state
of each potentially shared object. Non-conflicting
accesses conform to the locality state and require no
synchronization; only conflicting accesses require a
state change and heavyweight synchronization. This
optimistic tradeoff leads to significant efficiency
gains in capturing cross-thread dependences: a
prototype implementation of OCTET in a high-performance
Java virtual machine slows real-world concurrent
programs by only 26\% on average. A dependence
recorder, suitable for record {\&} replay, built on top
of OCTET adds an additional 5\% overhead on average.
These results suggest that OCTET can provide a
foundation for developing low-overhead analyses that
check and enforce concurrency correctness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Noll:2013:OFD,
author = "Albert Noll and Thomas Gross",
title = "Online feedback-directed optimizations for parallel
{Java} code",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "713--728",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "The performance of parallel code significantly depends
on the parallel task granularity (PTG). If the PTG is
too coarse, performance suffers due to load imbalance;
if the PTG is too fine, performance suffers from the
overhead that is induced by parallel task creation and
scheduling. This paper presents a software platform
that automatically determines the PTG at run-time.
Automatic PTG selection is enabled by concurrent calls,
which are special source language constructs that
provide a late decision (at run-time) of whether
concurrent calls are executed sequentially or
concurrently (as a parallel task). Furthermore, the
execution semantics of concurrent calls permits the
runtime system to merge two (or more) concurrent calls
thereby coarsening the PTG. We present an integration
of concurrent calls into the Java programming language,
the Java Memory Model, and show how the Java Virtual
Machine can adapt the PTG based on dynamic profiling.
The performance evaluation shows that our runtime
system performs competitively to Java programs for
which the PTG is tuned manually. Compared to an
unfortunate choice of the PTG, this approach performs
up to 3x faster than standard Java code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Herhut:2013:RTP,
author = "Stephan Herhut and Richard L. Hudson and Tatiana
Shpeisman and Jaswanth Sreeram",
title = "{River Trail}: a path to parallelism in {JavaScript}",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "729--744",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509516",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "JavaScript is the most popular language on the web and
is a crucial component of HTML5 applications and
services that run on consumer platforms ranging from
desktops to phones. However, despite ample amount of
hardware parallelism available to web applications on
such platforms, JavaScript web applications remain
predominantly sequential. Common parallel programming
solutions accepted by other programming languages
failed to transfer themselves to JavaScript due to
differences in programming models, the additional
requirements of the web and different developer
expectations. In this paper we present River Trail ---
a parallel programming model and API for JavaScript
that provides safe, portable, programmer-friendly,
deterministic parallelism to JavaScript applications.
River Trail allows web applications to effectively
utilize multiple cores, vector instructions, and GPUs
on client platforms while allowing the web developer to
remain within the environment of JavaScript. We
describe the implementation of the River Trail compiler
and runtime and present experimental results that show
the impact of River Trail on performance and
scalability for a variety of realistic HTML5
applications. Our experiments show that River Trail has
a dramatic positive impact on overall performance and
responsiveness of computationally intense JavaScript
based applications achieving up to 33.6 times speedup
for kernels and up to 11.8 times speedup for realistic
web applications compared to sequential JavaScript.
Moreover, River Trail enables new interactive web
usages that are simply not even possible with standard
sequential JavaScript.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bhattacharya:2013:CCI,
author = "Suparna Bhattacharya and Kanchi Gopinath and Mangala
Gowri Nanda",
title = "Combining concern input with program analysis for
bloat detection",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "745--764",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Framework based software tends to get bloated by
accumulating optional features (or concerns )
just-in-case they are needed. The good news is that
such feature bloat need not always cause runtime
execution bloat. The bad news is that often enough,
only a few statements from an optional concern may
cause execution bloat that may result in as much as
50\% runtime overhead. We present a novel technique to
analyze the connection between optional concerns and
the potential sources of execution bloat induced by
them. Our analysis automatically answers questions such
as (1) whether a given set of optional concerns could
lead to execution bloat and (2) which particular
statements are the likely sources of bloat when those
concerns are not required. The technique combines
coarse grain concern input from an external source with
a fine-grained static analysis. Our experimental
evaluation highlights the effectiveness of such concern
augmented program analysis in execution bloat
assessment of ten programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhang:2013:IMF,
author = "Lingming Zhang and Lu Zhang and Sarfraz Khurshid",
title = "Injecting mechanical faults to localize developer
faults for evolving software",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "765--784",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509551",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "This paper presents a novel methodology for localizing
faults in code as it evolves. Our insight is that the
essence of failure-inducing edits made by the developer
can be captured using mechanical program
transformations (e.g., mutation changes). Based on the
insight, we present the FIFL framework, which uses both
the spectrum information of edits (obtained using the
existing FaultTracer approach) as well as the potential
impacts of edits (simulated by mutation changes) to
achieve more accurate fault localization. We evaluate
FIFL on real-world repositories of nine Java projects
ranging from 5.7KLoC to 88.8KLoC. The experimental
results show that FIFL is able to outperform the
state-of-the-art FaultTracer technique for localizing
failure-inducing program edits significantly. For
example, all 19 FIFL strategies that use both the
spectrum information and simulated impact information
for each edit outperform the existing FaultTracer
approach statistically at the significance level of
0.01. In addition, FIFL with its default settings
outperforms FaultTracer by 2.33\% to 86.26\% on 16 of
the 26 studied version pairs, and is only inferior than
FaultTracer on one version pair.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Deng:2013:ECB,
author = "Dongdong Deng and Wei Zhang and Shan Lu",
title = "Efficient concurrency-bug detection across inputs",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "785--802",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "In the multi-core era, it is critical to efficiently
test multi-threaded software and expose concurrency
bugs before software release. Previous work has made
significant progress in detecting and validating
concurrency bugs under a given input. Unfortunately,
software testing always faces large sets of test
inputs, and existing techniques are still too expensive
to be applied to every test input in practice. In this
paper, we use open-source software to study how
existing concurrency-bug detection tools work for a set
of inputs. The study shows that an interleaving
pattern, such as a data race or an atomicity violation,
can often be exposed by many inputs. Consequently,
existing bug detectors would inevitably waste their bug
detection effort to generate duplicate bug reports,
when applied to a set of inputs. Guided by the above
study, we propose a coverage metric, Concurrent
Function Pairs (CFP), to efficiently approximate how
interleavings overlap across inputs. Using CFP, we have
designed a new approach to detecting data races and
atomicity-violation bugs for a set of inputs. Our
evaluation on open-source C/C++ applications shows that
our CFP-guided approach can effectively accelerate
concurrency-bug detection for a set of inputs by
reducing redundant detection effort across inputs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhong:2013:DAD,
author = "Hao Zhong and Zhendong Su",
title = "Detecting {API} documentation errors",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "803--816",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509523",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "When programmers encounter an unfamiliar API library,
they often need to refer to its documentations,
tutorials, or discussions on development forums to
learn its proper usage. These API documents contain
valuable information, but may also mislead programmers
as they may contain errors ( e.g., broken code names
and obsolete code samples). Although most API documents
are actively maintained and updated, studies show that
many new and latent errors do exist. It is tedious and
error-prone to find such errors manually as API
documents can be enormous with thousands of pages.
Existing tools are ineffective in locating
documentation errors because traditional natural
language (NL) tools do not understand code names and
code samples, and traditional code analysis tools do
not understand NL sentences. In this paper, we propose
the first approach, DOCREF, specifically designed and
developed to detect API documentation errors. We
formulate a class of inconsistencies to indicate
potential documentation errors, and combine NL and code
analysis techniques to detect and report such
inconsistencies. We have implemented DOCREF and
evaluated its effectiveness on the latest
documentations of five widely-used API libraries.
DOCREF has detected more than 1,000 new documentation
errors, which we have reported to the authors. Many of
the errors have already been confirmed and fixed, after
we reported them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bao:2013:FDI,
author = "Tao Bao and Xiangyu Zhang",
title = "On-the-fly detection of instability problems in
floating-point program execution",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "817--832",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "The machine representation of floating point values
has limited precision such that errors may be
introduced during execution. These errors may get
propagated and magnified by the following operations,
leading to instability problems, e.g., control flow
path may be undesirably altered and faulty output may
be emitted. In this paper, we develop an on-the-fly
efficient monitoring technique that can predict if an
execution is stable. The technique does not explicitly
compute errors as doing so incurs high overhead.
Instead, it detects possible places where an error
becomes substantially inflated regarding the
corresponding value, and then tags the value with one
bit to denote that it has an inflated error. It then
tracks inflation bit propagation, taking care of
operations that may cut off such propagation. It
reports instability if any inflation bit reaches a
critical execution point, such as a predicate, where
the inflated error may induce substantial execution
difference, such as different execution paths. Our
experiment shows that with appropriate thresholds, the
technique can correctly detect that over 99.999996\% of
the inputs of all the programs we studied are stable
while a traditional technique relying solely on
inflation detection mistakenly classifies majority of
the inputs as unstable for some of the programs.
Compared to the state of the art technique that is
based on high precision computation and causes several
hundred times slowdown, our technique only causes 7.91
times slowdown on average and can report all the true
unstable executions with the appropriate thresholds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Coons:2013:BPO,
author = "Katherine E. Coons and Madan Musuvathi and Kathryn S.
McKinley",
title = "Bounded partial-order reduction",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "833--848",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509556",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "Eliminating concurrency errors is increasingly
important as systems rely more on parallelism for
performance. Exhaustively exploring the state-space of
a program's thread interleavings finds concurrency
errors and provides coverage guarantees, but suffers
from exponential state-space explosion. Two prior
approaches alleviate state-space explosion. (1) Dynamic
partial-order reduction (DPOR) provides full coverage
and explores only one interleaving of independent
transitions. (2) Bounded search provides bounded
coverage by enumerating interleavings that do not
exceed a bound. In particular, we focus on
preemption-bounding. Combining partial-order reduction
with preemption-bounding had remained an open problem.
We show that preemption-bounded search explores the
same partial orders repeatedly and consequently
explores more executions than unbounded DPOR, even for
small bounds. We further show that if DPOR simply uses
the preemption bound to prune the state space as it
explores new partial orders, it misses parts of the
state space reachable in the bound and is therefore
unsound. The bound essentially induces dependences
between otherwise independent transitions in the DPOR
state space. We introduce Bounded Partial Order
Reduction (BPOR), a modification of DPOR that
compensates for bound dependences. We identify
properties that determine how well bounds combine with
partial-order reduction. We prove sound coverage and
empirically evaluate BPOR with preemption and fairness
bounds. We show that by eliminating redundancies, BPOR
significantly reduces testing time compared to bounded
search. BPOR's faster incremental guarantees will help
testers verify larger concurrent programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Mitchell:2013:FCP,
author = "Nick Mitchell and Peter F. Sweeney",
title = "On-the-fly capacity planning",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "849--866",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509540",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "When resolving performance problems, a simple
histogram of hot call stacks does not cut it,
especially given the highly fluid nature of modern
deployments. Why bother tuning, when adding a few CPUs
via the management console will quickly resolve the
problem? The findings of these tools are also presented
without any sense of context: e.g. string conversion
may be expensive, but only matters if it contributes
greatly to the response time of user logins.
Historically, these concerns have been the purview of
capacity planning. The power of planners lies in their
ability to weigh demand versus capacity, and to do so
in terms of the important units of work in the
application (such as user logins). Unfortunately, they
rely on measurements of rates and latencies, and both
quantities are difficult to obtain. Even if possible,
when all is said and done, these planners only relate
to the code as a black-box: but, why bother adding
CPUs, when easy code changes will fix the problem? We
present a way to do planning on-the-fly: with a few
call stack samples taken from an already-running
system, we predict the benefit of a proposed tuning
plan. We accomplish this by simulating the effect of a
tuning action upon execution speed and the way it
shifts resource demand. To identify existing problems,
we show how to generate tuning actions automatically,
guided by the desire to maximize speedup without
needless expense, and that these generated plans may
span resource and code changes. We show that it is
possible to infer everything needed from these samples
alone: levels of resource demand and the units of work
in the application. We evaluate our planner on a suite
of microbenchmarks and a suite of 15,000 data sets that
come from real applications running in the wild.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Vafeiadis:2013:RSL,
author = "Viktor Vafeiadis and Chinmay Narayan",
title = "Relaxed separation logic: a program logic for {C11}
concurrency",
journal = j-SIGPLAN,
volume = "48",
number = "10",
pages = "867--884",
month = oct,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2544173.2509532",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 09:19:33 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "OOPSLA '13 conference proceedings.",
abstract = "We introduce relaxed separation logic (RSL), the first
program logic for reasoning about concurrent programs
running under the C11 relaxed memory model. From a
user's perspective, RSL is an extension of concurrent
separation logic (CSL) with proof rules for the various
kinds of C11 atomic accesses. As in CSL, individual
threads are allowed to access non-atomically only the
memory that they own, thus preventing data races.
Ownership can, however, be transferred via certain
atomic accesses. For SC-atomic accesses, we permit
arbitrary ownership transfer; for acquire/release
atomic accesses, we allow ownership transfer only in
one direction; whereas for relaxed atomic accesses, we
rule out ownership transfer completely. We illustrate
RSL with a few simple examples and prove its soundness
directly over the axiomatic C11 weak memory model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Petrank:2013:SFA,
author = "Erez Petrank",
title = "Safety-first approach to memory consistency models",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "1--2",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2466479",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Reames:2013:THC,
author = "Philip Reames and George Necula",
title = "Towards hinted collection: annotations for decreasing
garbage collector pause times",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "3--14",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2464158",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "Garbage collection is widely used and has largely been
a boon for programmer productivity. However,
traditional garbage collection is approaching both
practical and theoretical performance limits. In
practice, the maximum heap size and heap structure of
large applications are influenced as much by garbage
collector behavior as by resource availability. We
present an alternate approach to garbage collection
wherein the programmer provides untrusted deallocation
hints. Usage of deallocation hints is similar to
trusted manual deallocation, but the consequence of an
inaccurate hint is lost performance not correctness.
Our hinted collector algorithm uses these hints to
identify a subset of unreachable objects with both
better parallel asymptotic complexity and practical
performance. On some benchmarks, our prototype
collector implementation achieves 10-20\% pause time
reductions. We close with a discussion of the design
trade-offs inherent in our approach and lessons to be
learned from our collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Morikawa:2013:ASR,
author = "Kazuya Morikawa and Tomoharu Ugawa and Hideya
Iwasaki",
title = "Adaptive scanning reduces sweep time for the {Lisp2}
mark-compact garbage collector",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "15--26",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2466480",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "Mark-compact garbage collection helps long-running
programs avoid fragmentation. The Lisp2 mark-compact
collector is a classic but still widely-used compaction
algorithm. It sequentially scans the entire heap to
compact all live objects at one end of the heap while
preserving their order of addresses. Since the heap is
generally large, this scanning takes a long time.
Although some collectors adopt a separate bitmap into
which mark bits of objects are stored to reduce the
scanning time, we observed that scanning the bitmap can
take longer than scanning the heap if objects are
densely located. We propose a new scanning method from
this observation, which adaptively alternates methods
of scanning depending on heap usage; it scans those
parts of the heap where live objects are densely
located whereas it scans the bitmap for the remaining
parts. We implemented this scanning method in the Lisp2
collector of Jikes RVM. The experimental results
revealed that the adaptive scanner scanned faster than
the method that only scanned the heap and the method
that only scanned the bitmap.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{White:2013:CTP,
author = "David R. White and Jeremy Singer and Jonathan M.
Aitken and Richard E. Jones",
title = "Control theory for principled heap sizing",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "27--38",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2466481",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "ISMM '13 conference proceedings.",
abstract = "We propose a new, principled approach to adaptive heap
sizing based on control theory. We review current
state-of-the-art heap sizing mechanisms, as deployed in
Jikes RVM and HotSpot. We then formulate heap sizing as
a control problem, apply and tune a standard controller
algorithm, and evaluate its performance on a set of
well-known benchmarks. We find our controller adapts
the heap size more responsively than existing
mechanisms. This responsiveness allows tighter virtual
machine memory footprints while preserving target
application throughput, which is ideal for both
embedded and utility computing domains. In short, we
argue that formal, systematic approaches to memory
management should be replacing ad-hoc heuristics as the
discipline matures. Control-theoretic heap sizing is
one such systematic approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Brock:2013:PPA,
author = "Jacob Brock and Xiaoming Gu and Bin Bao and Chen
Ding",
title = "{Pacman}: program-assisted cache management",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "39--50",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2466482",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "As caches become larger and shared by an increasing
number of cores, cache management is becoming more
important. This paper explores collaborative caching,
which uses software hints to influence hardware
caching. Recent studies have shown that such
collaboration between software and hardware can
theoretically achieve optimal cache replacement on
LRU-like cache. This paper presents Pacman, a practical
solution for collaborative caching in loop-based code.
Pacman uses profiling to analyze patterns in an optimal
caching policy in order to determine which data to
cache and at what time. It then splits each loop into
different parts at compile time. At run time, the loop
boundary is adjusted to selectively store data that
would be stored in an optimal policy. In this way,
Pacman emulates the optimal policy wherever it can.
Pacman requires a single bit at the load and store
instructions. Some of the current hardware has partial
support. This paper presents results using both
simulated and real systems, and compares simulated
results to related caching policies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wang:2013:GSE,
author = "Yan Wang and Iulian Neamtiu and Rajiv Gupta",
title = "Generating sound and effective memory debuggers",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "51--62",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2464159",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "We present a new approach for constructing debuggers
based on declarative specification of bug conditions
and root causes, and automatic generation of debugger
code. We illustrate our approach on several classes of
bugs, memory or otherwise. For each bug class, bug
conditions and their root cause are specified
declaratively, in First-order logic, using 1 to 4
predicates. We employ a low-level operational semantics
and abstract traces to permit concise bug specification
and prove soundness. To facilitate locating bugs, we
introduce a new concept of value propagation chains
that reduce programmer burden by narrowing the fault to
a handful of executed instructions (1 to 16 in our
experiments). We employ automatic translation to
generate the debugger implementation, which runs on top
of the Pin infrastructure. Experiments with using our
system on 7 versions of 4 real-world programs show that
our approach is expressive, effective at finding bugs
and their causes, and efficient. We believe that, using
our approach, other kinds of declaratively-specified,
provably-correct, auto-generated debuggers can be
constructed with little effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kalibera:2013:RBR,
author = "Tomas Kalibera and Richard Jones",
title = "Rigorous benchmarking in reasonable time",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "63--74",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2464160",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "ISMM '13 conference proceedings.",
abstract = "Experimental evaluation is key to systems research.
Because modern systems are complex and
non-deterministic, good experimental methodology
demands that researchers account for uncertainty. To
obtain valid results, they are expected to run many
iterations of benchmarks, invoke virtual machines (VMs)
several times, or even rebuild VM or benchmark binaries
more than once. All this repetition costs time to
complete experiments. Currently, many evaluations give
up on sufficient repetition or rigorous statistical
methods, or even run benchmarks only in training sizes.
The results reported often lack proper variation
estimates and, when a small difference between two
systems is reported, some are simply unreliable. In
contrast, we provide a statistically rigorous
methodology for repetition and summarising results that
makes efficient use of experimentation time. Time
efficiency comes from two key observations. First, a
given benchmark on a given platform is typically prone
to much less non-determinism than the common worst-case
of published corner-case studies. Second, repetition is
most needed where most uncertainty arises (whether
between builds, between executions or between
iterations). We capture experimentation cost with a
novel mathematical model, which we use to identify the
number of repetitions at each level of an experiment
necessary and sufficient to obtain a given level of
precision. We present our methodology as a cookbook
that guides researchers on the number of repetitions
they should run to obtain reliable results. We also
show how to present results with an effect size
confidence interval. As an example, we show how to use
our methodology to conduct throughput experiments with
the DaCapo and SPEC CPU benchmarks on three recent
platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Aigner:2013:ATU,
author = "Martin Aigner and Christoph M. Kirsch",
title = "{ACDC}: towards a universal mutator for benchmarking
heap management systems",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "75--84",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2464161",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "We present ACDC, an open-source benchmark that may be
configured to emulate explicit single- and
multi-threaded memory allocation, sharing, access, and
deallocation behavior to expose virtually any relevant
allocator performance differences. ACDC mimics periodic
memory allocation and deallocation (AC) as well as
persistent memory (DC). Memory may be allocated
thread-locally and shared among multiple threads to
study multicore scalability and even false sharing.
Memory may be deallocated by threads other than the
allocating threads to study blowup memory
fragmentation. Memory may be accessed and deallocated
sequentially in allocation order or in tree-like
traversals to expose allocator deficiencies in
exploiting spatial locality. We demonstrate ACDC's
capabilities with seven state-of-the-art allocators for
C/C++ in an empirical study which also reveals
interesting performance differences between the
allocators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2013:PSC,
author = "Lian Li and Cristina Cifuentes and Nathan Keynes",
title = "Precise and scalable context-sensitive pointer
analysis via value flow graph",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "85--96",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2466483",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "In this paper, we propose a novel method for
context-sensitive pointer analysis using the value flow
graph (VFG) formulation. We achieve context-sensitivity
by simultaneously applying function cloning and
computing context-free language reachability
(CFL-reachability) in a novel way. In contrast to
existing clone-based and CFL-based approaches,
flow-sensitivity is easily integrated in our approach
by using a flow-sensitive VFG where each value flow
edge is computed in a flow-sensitive manner. We apply
context-sensitivity to both local variables and heap
objects and propose a new approximation for heap
cloning. We prove that our approach can achieve
context-sensitivity without loss of precision, i.e., it
is as precise as inlining all function calls. We
develop an efficient algorithm and implement a
context-, flow-, and field-sensitive pointer analysis
with heap cloning support in LLVM. We evaluate the
efficiency and precision of our implementation using
standard SPEC CPU2006 benchmarks. Our experimental
results show that the analysis is much faster than
existing approaches, it scales well to large real-world
applications, and it enables more effective compiler
optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ravitch:2013:AMO,
author = "Tristan Ravitch and Ben Liblit",
title = "Analyzing memory ownership patterns in {C} libraries",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "97--108",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2464162",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "Programs written in multiple languages are known as
polyglot programs. In part due to the proliferation of
new and productive high-level programming languages,
these programs are becoming more common in environments
that must interoperate with existing systems. Polyglot
programs must manage resource lifetimes across language
boundaries. Resource lifetime management bugs can lead
to leaks and crashes, which are more difficult to debug
in polyglot programs than monoglot programs. We present
analyses to automatically infer the ownership semantics
of C libraries. The results of these analyses can be
used to generate bindings to C libraries that
intelligently manage resources, to check the
correctness of polyglot programs, and to document the
interfaces of C libraries. While these analyses are
unsound and incomplete, we demonstrate that they
significantly reduce the manual annotation burden for a
suite of fifteen open source libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ricci:2013:ETP,
author = "Nathan P. Ricci and Samuel Z. Guyer and J. Eliot B.
Moss",
title = "{Elephant Tracks}: portable production of complete and
precise {GC} traces",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "109--118",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2466484",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "We present Elephant Tracks (ET), a dynamic program
analysis tool for Java that produces detailed traces of
garbage collection-related events, including object
allocations, object deaths, and pointer updates. Like
prior work, our tracing tool is based on the Merlin
algorithm [6,7], but offers several substantial new
capabilities. First, it is much more precise than
previous tools: it traces method entries and exits and
measures time in terms of them, allowing it to place
events precisely in the context of the program
structure. Second, it is implemented using a
combination of JVM Tool Interface (JVMTI)[13] callbacks
and bytecode rewriting, and works with any standard
JVM. Finally, it produces complete traces, including
weak references, events from the Java Native Interface
and sun.misc.Unsafe, and VM start up objects. In this
paper we also explore the general design space of
tracing tools, and carefully define the execution model
that the traces represent.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bu:2013:BAD,
author = "Yingyi Bu and Vinayak Borkar and Guoqing Xu and
Michael J. Carey",
title = "A bloat-aware design for big data applications",
journal = j-SIGPLAN,
volume = "48",
number = "11",
pages = "119--130",
month = nov,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2555670.2466485",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 9 08:04:34 MST 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "ISMM '13 conference proceedings.",
abstract = "Over the past decade, the increasing demands on
data-driven business intelligence have led to the
proliferation of large-scale, data-intensive
applications that often have huge amounts of data
(often at terabyte or petabyte scale) to process. An
object-oriented programming language such as Java is
often the developer's choice for implementing such
applications, primarily due to its quick development
cycle and rich community resource. While the use of
such languages makes programming easier, significant
performance problems can often be seen --- the
combination of the inefficiencies inherent in a managed
run-time system and the impact of the huge amount of
data to be processed in the limited memory space often
leads to memory bloat and performance degradation at a
surprisingly early stage. This paper proposes a
bloat-aware design paradigm towards the development of
efficient and scalable Big Data applications in
object-oriented GC enabled languages. To motivate this
work, we first perform a study on the impact of several
typical memory bloat patterns. These patterns are
summarized from the user complaints on the mailing
lists of two widely-used open-source Big Data
applications. Next, we discuss our design paradigm to
eliminate bloat. Using examples and real-world
experience, we demonstrate that programming under this
paradigm does not incur significant programming burden.
We have implemented a few common data processing tasks
both using this design and using the conventional
object-oriented design. Our experimental results show
that this new design paradigm is extremely effective in
improving performance --- even for the moderate-size
data sets processed, we have observed 2.5x+ performance
gains, and the improvement grows substantially with the
size of the data set.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ankner:2013:EAH,
author = "Johan Ankner and Josef David Svenningsson",
title = "An {EDSL} approach to high performance {Haskell}
programming",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "1--12",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503789",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "This paper argues for a new methodology for writing
high performance Haskell programs by using Embedded
Domain Specific Languages. We exemplify the methodology
by describing a complete library, meta-repa, which is a
reimplementation of parts of the repa library. The
paper describes the implementation of meta-repa and
contrasts it with the standard approach to writing high
performance libraries. We conclude that even though the
embedded language approach has an initial cost of
defining the language and some syntactic overhead it
gives a more tailored programming model, stronger
performance guarantees, better control over
optimizations, simpler implementation of fusion and
inlining and allows for moving type level programming
down to value level programming in some cases. We also
provide benchmarks showing that meta-repa is as fast,
or faster, than repa. Furthermore, meta-repa also
includes push arrays and we demonstrate their
usefulness for writing certain high performance kernels
such as FFT.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bernardy:2013:NFP,
author = "Jean-Philippe Bernardy and Nicolas Pouillard",
title = "Names for free: polymorphic views of names and
binders",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "13--24",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503780",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "We propose a novel technique to represent names and
binders in Haskell. The dynamic (run-time)
representation is based on de Bruijn indices, but it
features an interface to write and manipulate variables
conveniently, using Haskell-level lambdas and
variables. The key idea is to use rich types: a subterm
with an additional free variable is viewed either as $
\forall \nu . \nu \to {\rm Term}(a + \nu) $ or $
\exists \nu . \nu \times {\rm Term}(a + v) $ depending
on whether it is constructed or analysed. We
demonstrate on a number of examples how this approach
permits to express term construction and manipulation
in a natural way, while retaining the good properties
of representations based on de Bruijn indices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bird:2013:UIT,
author = "Richard Bird and Jeremy Gibbons and Stefan Mehner and
Janis Voigtl{\"a}nder and Tom Schrijvers",
title = "Understanding idiomatic traversals backwards and
forwards",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "25--36",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503781",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "We present new ways of reasoning about a particular
class of effectful Haskell programs, namely those
expressed as idiomatic traversals. Starting out with a
specific problem about labelling and unlabelling binary
trees, we extract a general inversion law, applicable
to any monad, relating a traversal over the elements of
an arbitrary traversable type to a traversal that goes
in the opposite direction. This law can be invoked to
show that, in a suitable sense, unlabelling is the
inverse of labelling. The inversion law, as well as a
number of other properties of idiomatic traversals, is
a corollary of a more general theorem characterising
traversable functors as finitary containers: an
arbitrary traversable object can be decomposed uniquely
into shape and contents, and traversal be understood in
terms of those. Proof of the theorem involves the
properties of traversal in a special idiom related to
the free applicative functor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Blazevic:2013:ASM,
author = "Mario Bla{\v{z}}evi{\'c}",
title = "Adding structure to monoids: thus hopefully ending
{Haskell}'s string type confusion",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "37--46",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503785",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "This paper presents the rationale and design of
monoid-subclasses. This Haskell library consists of a
collection of type classes that generalize the
interface of several common data types, most
importantly those used to represent strings. We
demonstrate that the mathematical theory behind
monoid-subclasses can bring substantial practical
benefits to the Haskell library ecosystem by
generalizing attoparsec, one of the most popular
Haskell parsing libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Claessen:2013:SPN,
author = "Koen Claessen and Michal H. Palka",
title = "Splittable pseudorandom number generators using
cryptographic hashing",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "47--58",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503784",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "We propose a new splittable pseudorandom number
generator (PRNG) based on a cryptographic hash
function. Splittable PRNGs, in contrast to linear
PRNGs, allow the creation of two (seemingly)
independent generators from a given random number
generator. Splittable PRNGs are very useful for
structuring purely functional programs, as they avoid
the need for threading around state. We show that the
currently known and used splittable PRNGs are either
not efficient enough, have inherent flaws, or lack
formal arguments about their randomness. In contrast,
our proposed generator can be implemented efficiently,
and comes with a formal statements and proofs that
quantify how 'random' the results are that are
generated. The provided proofs give strong randomness
guarantees under assumptions commonly made in
cryptography.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kiselyov:2013:EEA,
author = "Oleg Kiselyov and Amr Sabry and Cameron Swords",
title = "Extensible effects: an alternative to monad
transformers",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "59--70",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503791",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "We design and implement a library that solves the
long-standing problem of combining effects without
imposing restrictions on their interactions (such as
static ordering). Effects arise from interactions
between a client and an effect handler (interpreter);
interactions may vary throughout the program and
dynamically adapt to execution conditions. Existing
code that relies on monad transformers may be used with
our library with minor changes, gaining efficiency over
long monad stacks. In addition, our library has greater
expressiveness, allowing for practical idioms that are
inefficient, cumbersome, or outright impossible with
monad transformers. Our alternative to a monad
transformer stack is a single monad, for the
coroutine-like communication of a client with its
handler. Its type reflects possible requests, i.e.,
possible effects of a computation. To support arbitrary
effects and their combinations, requests are values of
an extensible union type, which allows adding and,
notably, subtracting summands. Extending and, upon
handling, shrinking of the union of possible requests
is reflected in its type, yielding a type-and-effect
system for Haskell. The library is lightweight,
generalizing the extensible exception handling to other
effects and accurately tracking them in types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Leslie-Hurd:2013:MVS,
author = "Joe Leslie-Hurd",
title = "Maintaining verified software",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "71--80",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503787",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "Maintaining software in the face of evolving
dependencies is a challenging problem, and in addition
to good release practices there is a need for automatic
dependency analysis tools to avoid errors creeping in.
Verified software reveals more semantic information in
the form of mechanized proofs of functional
specifications, and this can be used for dependency
analysis. In this paper we present a scheme for
automatic dependency analysis of verified software,
which for each program checks that the collection of
installed libraries is sufficient to guarantee its
functional correctness. We illustrate the scheme with a
case study of Haskell packages verified in higher order
logic. The dependency analysis reduces the burden of
maintaining verified Haskell packages by automatically
computing version ranges for the packages they depend
on, such that any combination provides the
functionality required for correct operation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lindley:2013:HPP,
author = "Sam Lindley and Conor McBride",
title = "{Hasochism}: the pleasure and pain of dependently
typed {Haskell} programming",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "81--92",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503786",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "Haskell's type system has outgrown its Hindley-Milner
roots to the extent that it now stretches to the basics
of dependently typed programming. In this paper, we
collate and classify techniques for programming with
dependent types in Haskell, and contribute some new
ones. In particular, through extended examples ---
merge-sort and rectangular tilings --- we show how to
exploit Haskell's constraint solver as a theorem
prover, delivering code which, as Agda programmers, we
envy. We explore the compromises involved in simulating
variations on the theme of the dependent function space
in an attempt to help programmers put dependent types
to work, and to inform the evolving language design
both of Haskell and of dependently typed languages more
broadly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lippmeier:2013:DFF,
author = "Ben Lippmeier and Manuel M. T. Chakravarty and
Gabriele Keller and Amos Robinson",
title = "Data flow fusion with series expressions in
{Haskell}",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "93--104",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503782",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "Existing approaches to array fusion can deal with
straight-line producer consumer pipelines, but cannot
fuse branching data flows where a generated array is
consumed by several different consumers. Branching data
flows are common and natural to write, but a lack of
fusion leads to the creation of an intermediate array
at every branch point. We present a new array fusion
system that handles branches, based on Waters's series
expression framework, but extended to work in a
functional setting. Our system also solves a related
problem in stream fusion, namely the introduction of
duplicate loop counters. We demonstrate speedup over
existing fusion systems for several key examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2013:ILH,
author = "Hai Liu and Neal Glew and Leaf Petersen and Todd A.
Anderson",
title = "The {Intel} labs {Haskell} research compiler",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "105--116",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503779",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "The Glasgow Haskell Compiler (GHC) is a well supported
optimizing compiler for the Haskell programming
language, along with its own extensions to the language
and libraries. Haskell's lazy semantics imposes a
runtime model which is in general difficult to
implement efficiently. GHC achieves good performance
across a wide variety of programs via aggressive
optimization taking advantage of the lack of side
effects, and by targeting a carefully tuned virtual
machine. The Intel Labs Haskell Research Compiler uses
GHC as a frontend, but provides a new whole-program
optimizing backend by compiling the GHC intermediate
representation to a relatively generic functional
language compilation platform. We found that GHC's
external Core language was relatively easy to use, but
reusing GHC's libraries and achieving full
compatibility were harder. For certain classes of
programs, our platform provides substantial performance
benefits over GHC alone, performing $ 2 \times $ faster
than GHC with the LLVM backend on selected modern
performance-oriented benchmarks; for other classes of
programs, the benefits of GHC's tuned virtual machine
continue to outweigh the benefits of more aggressive
whole program optimization. Overall we achieve parity
with GHC with the LLVM backend. In this paper, we
describe our Haskell compiler stack, its implementation
and optimization approach, and present benchmark
results comparing it to GHC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{vanderPloeg:2013:MFR,
author = "Atze van der Ploeg",
title = "Monadic functional reactive programming",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "117--128",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503783",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "Functional Reactive Programming (FRP) is a way to
program reactive systems in functional style,
eliminating many of the problems that arise from
imperative techniques. In this paper, we present an
alternative FRP formulation that is based on the notion
of a reactive computation: a monadic computation which
may require the occurrence of external events to
continue. A signal computation is a reactive
computation that may also emit values. In contrast to
signals in other FRP formulations, signal computations
can end, leading to a monadic interface for sequencing
signal phases. This interface has several advantages:
routing is implicit, sequencing signal phases is easier
and more intuitive than when using the switching
combinators found in other FRP approaches, and dynamic
lists require much less boilerplate code. In other FRP
approaches, either the entire FRP expression is
re-evaluated on each external stimulus, or impure
techniques are used to prevent redundant
re-computations. We show how Monadic FRP can be
implemented straightforwardly in a purely functional
way while preventing redundant re-computations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Voellmy:2013:MHP,
author = "Andreas Richard Voellmy and Junchang Wang and Paul
Hudak and Kazuhiko Yamamoto",
title = "{Mio}: a high-performance multicore {IO} manager for
{GHC}",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "129--140",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503790",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "Haskell threads provide a key, lightweight concurrency
abstraction to simplify the programming of important
network applications such as web servers and
software-defined network (SDN) controllers. The
flagship Glasgow Haskell Compiler (GHC) introduces a
run-time system (RTS) to achieve a high-performance
multicore implementation of Haskell threads, by
introducing effective components such as a multicore
scheduler, a parallel garbage collector, an IO manager,
and efficient multicore memory allocation. Evaluations
of the GHC RTS, however, show that it does not scale
well on multicore processors, leading to poor
performance of many network applications that try to
use lightweight Haskell threads. In this paper, we show
that the GHC IO manager, which is a crucial component
of the GHC RTS, is the scaling bottleneck. Through a
series of experiments, we identify key data structure,
scheduling, and dispatching bottlenecks of the GHC IO
manager. We then design a new multicore IO manager
named Mio that eliminates all these bottlenecks. Our
evaluations show that the new Mio manager improves
realistic web server throughput by 6.5x and reduces
expected web server response time by 5.7x. We also show
that with Mio, McNettle (an SDN controller written in
Haskell) can scale effectively to 40+ cores, reach a
throughput of over 20 million new requests per second
on a single machine, and hence become the fastest of
all existing SDN controllers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Wortmann:2013:COH,
author = "Peter M. Wortmann and David Duke",
title = "Causality of optimized {Haskell}: what is burning our
cycles?",
journal = j-SIGPLAN,
volume = "48",
number = "12",
pages = "141--152",
month = dec,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578854.2503788",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "Haskell '14 conference proceedings.",
abstract = "Profiling real-world Haskell programs is hard, as
compiler optimizations make it tricky to establish
causality between the source code and program behavior.
In this paper we attack the root issue by performing a
causality analysis of functional programs under
optimization. We apply our findings to build a novel
profiling infrastructure on top of the Glasgow Haskell
Compiler, allowing for performance analysis even of
aggressively optimized programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Birkedal:2014:MRA,
author = "Lars Birkedal",
title = "Modular reasoning about concurrent higher-order
imperative programs",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "1--1",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2537849",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cousot:2014:GCC,
author = "Patrick` Cousot and Radhia Cousot",
title = "A {Galois} connection calculus for abstract
interpretation",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "3--4",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2537850",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We introduce a Galois connection calculus for language
independent specification of abstract interpretations
used in programming language semantics, formal
verification, and static analysis. This Galois
connection calculus and its type system are typed by
abstract interpretation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Castagna:2014:PFS,
author = "Giuseppe Castagna and Kim Nguyen and Zhiwu Xu and
Hyeonseung Im and Sergue{\"\i} Lenglet and Luca
Padovani",
title = "Polymorphic functions with set-theoretic types: part
1: syntax, semantics, and evaluation",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "5--17",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535840",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "This article is the first part of a two articles
series about a calculus with higher-order polymorphic
functions, recursive types with arrow and product type
constructors and set-theoretic type connectives (union,
intersection, and negation). In this first part we
define and study the explicitly-typed version of the
calculus in which type instantiation is driven by
explicit instantiation annotations. In particular, we
define an explicitly-typed lambda-calculus with
intersection types and an efficient evaluation model
for it. In the second part, presented in a companion
paper, we define a local type inference system that
allows the programmer to omit explicit instantiation
annotations, and a type reconstruction system that
allows the programmer to omit explicit type
annotations. The work presented in the two articles
provides the theoretical foundations and technical
machinery needed to design and implement higher-order
polymorphic functional languages for semi-structured
data.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kilpatrick:2014:BRH,
author = "Scott Kilpatrick and Derek Dreyer and Simon Peyton
Jones and Simon Marlow",
title = "{Backpack}: retrofitting {Haskell} with interfaces",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "19--31",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535884",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Module systems like that of Haskell permit only a weak
form of modularity in which module implementations
depend directly on other implementations and must be
processed in dependency order. Module systems like that
of ML, on the other hand, permit a stronger form of
modularity in which explicit interfaces express
assumptions about dependencies, and each module can be
typechecked and reasoned about independently. In this
paper, we present Backpack, a new language for building
separately-typecheckable *packages* on top of a weak
module system like Haskell's. The design of Backpack is
inspired by the MixML module calculus of Rossberg and
Dreyer, but differs significantly in detail. Like
MixML, Backpack supports explicit interfaces and
recursive linking. Unlike MixML, Backpack supports a
more flexible applicative semantics of instantiation.
Moreover, its design is motivated less by foundational
concerns and more by the practical concern of
integration into Haskell, which has led us to advocate
simplicity --- in both the syntax and semantics of
Backpack --- over raw expressive power. The semantics
of Backpack packages is defined by elaboration to sets
of Haskell modules and binary interface files, thus
showing how Backpack maintains interoperability with
Haskell while extending it with separate typechecking.
Lastly, although Backpack is geared toward integration
into Haskell, its design and semantics are largely
agnostic with respect to the details of the underlying
core language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Casinghino:2014:CPP,
author = "Chris Casinghino and Vilhelm Sj{\"o}berg and Stephanie
Weirich",
title = "Combining proofs and programs in a dependently typed
language",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "33--45",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535883",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Most dependently-typed programming languages either
require that all expressions terminate (e.g. Coq, Agda,
and Epigram), or allow infinite loops but are
inconsistent when viewed as logics (e.g. Haskell, ATS,
$ \Omega $ ). Here, we combine these two approaches
into a single dependently-typed core language. The
language is composed of two fragments that share a
common syntax and overlapping semantics: a logic that
guarantees total correctness, and a call-by-value
programming language that guarantees type safety but
not termination. The two fragments may interact:
logical expressions may be used as programs; the logic
may soundly reason about potentially nonterminating
programs; programs can require logical proofs as
arguments; and ``mobile'' program values, including
proofs computed at runtime, may be used as evidence by
the logic. This language allows programmers to work
with total and partial functions uniformly, providing a
smooth path from functional programming to
dependently-typed programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dissegna:2014:TCA,
author = "Stefano Dissegna and Francesco Logozzo and Francesco
Ranzato",
title = "Tracing compilation by abstract interpretation",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "47--59",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535866",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Tracing just-in-time compilation is a popular
compilation schema for the efficient implementation of
dynamic languages, which is commonly used for
JavaScript, Python, and PHP. It relies on two key
ideas. First, it monitors the execution of the program
to detect so-called hot paths, i.e., the most
frequently executed paths. Then, it uses some store
information available at runtime to optimize hot paths.
The result is a residual program where the optimized
hot paths are guarded by sufficient conditions ensuring
the equivalence of the optimized path and the original
program. The residual program is persistently mutated
during its execution, e.g., to add new optimized paths
or to merge existing paths. Tracing compilation is thus
fundamentally different than traditional static
compilation. Nevertheless, despite the remarkable
practical success of tracing compilation, very little
is known about its theoretical foundations. We
formalize tracing compilation of programs using
abstract interpretation. The monitoring (viz., hot path
detection) phase corresponds to an abstraction of the
trace semantics that captures the most frequent
occurrences of sequences of program points together
with an abstraction of their corresponding stores,
e.g., a type environment. The optimization (viz.,
residual program generation) phase corresponds to a
transform of the original program that preserves its
trace semantics up to a given observation as modeled by
some abstraction. We provide a generic framework to
express dynamic optimizations and to prove them
correct. We instantiate it to prove the correctness of
dynamic type specialization. We show that our framework
is more general than a recent model of tracing
compilation introduced in POPL~2011 by Guo and Palsberg
(based on operational bisimulations). In our model we
can naturally express hot path reentrance and common
optimizations like dead-store elimination, which are
either excluded or unsound in Guo and Palsberg's
framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ramsay:2014:TDA,
author = "Steven J. Ramsay and Robin P. Neatherway and C.-H.
Luke Ong",
title = "A type-directed abstraction refinement approach to
higher-order model checking",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "61--72",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535873",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "The trivial-automaton model checking problem for
higher-order recursion schemes has become a widely
studied object in connection with the automatic
verification of higher-order programs. The problem is
formidably hard: despite considerable progress in
recent years, no decision procedures have been
demonstrated to scale robustly beyond recursion schemes
that comprise more than a few hundred rewrite rules. We
present a new, fixed-parameter polynomial time
algorithm, based on a novel, type directed form of
abstraction refinement in which behaviours of a scheme
are distinguished by the abstraction according to the
intersection types that they inhabit (the properties
that they satisfy). Unlike other intersection type
approaches, our algorithm reasons both about acceptance
by the property automaton and acceptance by its dual,
simultaneously, in order to minimize the amount of work
done by converging on the solution to a problem
instance from both sides. We have constructed Preface,
a prototype implementation of the algorithm, and
assembled an extensive body of evidence to demonstrate
empirically that the algorithm readily scales to
recursion schemes of several thousand rules, well
beyond the capabilities of current state-of-the-art
higher-order model checkers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Coughlin:2014:FTA,
author = "Devin Coughlin and Bor-Yuh Evan Chang",
title = "Fissile type analysis: modular checking of almost
everywhere invariants",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "73--85",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535855",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We present a generic analysis approach to the
imperative relationship update problem, in which
destructive updates temporarily violate a global
invariant of interest. Such invariants can be
conveniently and concisely specified with dependent
refinement types, which are efficient to check
flow-insensitively. Unfortunately, while traditional
flow-insensitive type checking is fast, it is
inapplicable when the desired invariants can be
temporarily broken. To overcome this limitation, past
works have directly ratcheted up the complexity of the
type analysis and associated type invariants, leading
to inefficient analysis and verbose specifications. In
contrast, we propose a generic lifting of modular
refinement type analyses with a symbolic analysis to
efficiently and effectively check concise invariants
that hold almost everywhere. The result is an
efficient, highly modular flow-insensitive type
analysis to optimistically check the preservation of
global relationship invariants that can fall back to a
precise, disjunctive symbolic analysis when the
optimistic assumption is violated. This technique
permits programmers to temporarily break and then
re-establish relationship invariants--a flexibility
that is crucial for checking relationships in
real-world, imperative languages. A significant
challenge is selectively violating the global type
consistency invariant over heap locations, which we
achieve via almost type-consistent heaps. To evaluate
our approach, we have encoded the problem of verifying
the safety of reflective method calls in dynamic
languages as a refinement type checking problem. Our
analysis is capable of validating reflective call
safety at interactive speeds on commonly-used
Objective-C libraries and applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bodin:2014:TMJ,
author = "Martin Bodin and Arthur Chargueraud and Daniele
Filaretti and Philippa Gardner and Sergio Maffeis and
Daiva Naudziuniene and Alan Schmitt and Gareth Smith",
title = "A trusted mechanised {JavaScript} specification",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "87--100",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535876",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "JavaScript is the most widely used web language for
client-side applications. Whilst the development of
JavaScript was initially just led by implementation,
there is now increasing momentum behind the ECMA
standardisation process. The time is ripe for a formal,
mechanised specification of JavaScript, to clarify
ambiguities in the ECMA standards, to serve as a
trusted reference for high-level language compilation
and JavaScript implementations, and to provide a
platform for high-assurance proofs of language
properties. We present JSCert, a formalisation of the
current ECMA standard in the Coq proof assistant, and
JSRef, a reference interpreter for JavaScript extracted
from Coq to OCaml. We give a Coq proof that JSRef is
correct with respect to JSCert and assess JSRef using
test262, the ECMA conformance test suite. Our
methodology ensures that JSCert is a comparatively
accurate formulation of the English standard, which
will only improve as time goes on. We have demonstrated
that modern techniques of mechanised specification can
handle the complexity of JavaScript.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Krebbers:2014:OAS,
author = "Robbert Krebbers",
title = "An operational and axiomatic semantics for
non-determinism and sequence points in {C}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "101--112",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535878",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "The C11 standard of the C programming language does
not specify the execution order of expressions.
Besides, to make more effective optimizations possible
(e.g., delaying of side-effects and interleaving), it
gives compilers in certain cases the freedom to use
even more behaviors than just those of all execution
orders. Widely used C compilers actually exploit this
freedom given by the C standard for optimizations, so
it should be taken seriously in formal verification.
This paper presents an operational and axiomatic
semantics (based on separation logic) for
non-determinism and sequence points in C. We prove
soundness of our axiomatic semantics with respect to
our operational semantics. This proof has been fully
formalized using the Coq proof assistant.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Anderson:2014:NSF,
author = "Carolyn Jane Anderson and Nate Foster and Arjun Guha
and Jean-Baptiste Jeannin and Dexter Kozen and Cole
Schlesinger and David Walker",
title = "{NetKAT}: semantic foundations for networks",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "113--126",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535862",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Recent years have seen growing interest in high-level
languages for programming networks. But the design of
these languages has been largely ad hoc, driven more by
the needs of applications and the capabilities of
network hardware than by foundational principles. The
lack of a semantic foundation has left language
designers with little guidance in determining how to
incorporate new features, and programmers without a
means to reason precisely about their code. This paper
presents NetKAT, a new network programming language
that is based on a solid mathematical foundation and
comes equipped with a sound and complete equational
theory. We describe the design of NetKAT, including
primitives for filtering, modifying, and transmitting
packets; union and sequential composition operators;
and a Kleene star operator that iterates programs. We
show that NetKAT is an instance of a canonical and
well-studied mathematical structure called a Kleene
algebra with tests (KAT) and prove that its equational
theory is sound and complete with respect to its
denotational semantics. Finally, we present practical
applications of the equational theory including
syntactic techniques for checking reachability, proving
non-interference properties that ensure isolation
between programs, and establishing the correctness of
compilation algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sharma:2014:BVT,
author = "Rahul Sharma and Aditya V. Nori and Alex Aiken",
title = "Bias-variance tradeoffs in program analysis",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "127--137",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535853",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "It is often the case that increasing the precision of
a program analysis leads to worse results. It is our
thesis that this phenomenon is the result of
fundamental limits on the ability to use precise
abstract domains as the basis for inferring strong
invariants of programs. We show that bias-variance
tradeoffs, an idea from learning theory, can be used to
explain why more precise abstractions do not
necessarily lead to better results and also provides
practical techniques for coping with such limitations.
Learning theory captures precision using a
combinatorial quantity called the VC dimension. We
compute the VC dimension for different abstractions and
report on its usefulness as a precision metric for
program analyses. We evaluate cross validation, a
technique for addressing bias-variance tradeoffs, on an
industrial strength program verification tool called
YOGI. The tool produced using cross validation has
significantly better running time, finds new defects,
and has fewer time-outs than the current production
version. Finally, we make some recommendations for
tackling bias-variance tradeoffs in program analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{DSilva:2014:AS,
author = "Vijay D'Silva and Leopold Haller and Daniel Kroening",
title = "Abstract satisfaction",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "139--150",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535868",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "This article introduces an abstract interpretation
framework that codifies the operations in SAT and SMT
solvers in terms of lattices, transformers and fixed
points. We develop the idea that a formula denotes a
set of models in a universe of structures. This set of
models has characterizations as fixed points of
deduction, abduction and quantification transformers. A
wide range of satisfiability procedures can be
understood as computing and refining approximations of
such fixed points. These include procedures in the DPLL
family, those for preprocessing and inprocessing in SAT
solvers, decision procedures for equality logics, weak
arithmetics, and procedures for approximate
quantification. Our framework provides a unified,
mathematical basis for studying and combining program
analysis and satisfiability procedures. A practical
benefit of our work is a new, logic-agnostic
architecture for implementing solvers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Farzan:2014:PC,
author = "Azadeh Farzan and Zachary Kincaid and Andreas
Podelski",
title = "Proofs that count",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "151--164",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535885",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Counting arguments are among the most basic proof
methods in mathematics. Within the field of formal
verification, they are useful for reasoning about
programs with infinite control, such as programs with
an unbounded number of threads, or (concurrent)
programs with recursive procedures. While counting
arguments are common in informal, hand-written proofs
of such programs, there are no fully automated
techniques to construct counting arguments. The key
questions involved in automating counting arguments
are: how to decide what should be counted?, and how to
decide when a counting argument is valid? In this
paper, we present a technique for automatically
constructing and checking counting arguments, which
includes novel solutions to these questions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{deAmorim:2014:VIF,
author = "Arthur Azevedo de Amorim and Nathan Collins and
Andr{\'e} DeHon and Delphine Demange and Catalin Hritcu
and David Pichardie and Benjamin C. Pierce and Randy
Pollack and Andrew Tolmach",
title = "A verified information-flow architecture",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "165--178",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535839",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "SAFE is a clean-slate design for a highly secure
computer system, with pervasive mechanisms for tracking
and limiting information flows. At the lowest level,
the SAFE hardware supports fine-grained programmable
tags, with efficient and flexible propagation and
combination of tags as instructions are executed. The
operating system virtualizes these generic facilities
to present an information-flow abstract machine that
allows user programs to label sensitive data with rich
confidentiality policies. We present a formal,
machine-checked model of the key hardware and software
mechanisms used to control information flow in SAFE and
an end-to-end proof of noninterference for this
model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kumar:2014:CVI,
author = "Ramana Kumar and Magnus O. Myreen and Michael Norrish
and Scott Owens",
title = "{CakeML}: a verified implementation of {ML}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "179--191",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535841",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We have developed and mechanically verified an ML
system called CakeML, which supports a substantial
subset of Standard ML. CakeML is implemented as an
interactive read-eval-print loop (REPL) in x86-64
machine code. Our correctness theorem ensures that this
REPL implementation prints only those results permitted
by the semantics of CakeML. Our verification effort
touches on a breadth of topics including lexing,
parsing, type checking, incremental and dynamic
compilation, garbage collection, arbitrary-precision
arithmetic, and compiler bootstrapping. Our
contributions are twofold. The first is simply in
building a system that is end-to-end verified,
demonstrating that each piece of such a verification
effort can in practice be composed with the others, and
ensuring that none of the pieces rely on any
over-simplifying assumptions. The second is developing
novel approaches to some of the more challenging
aspects of the verification. In particular, our
formally verified compiler can bootstrap itself: we
apply the verified compiler to itself to produce a
verified machine-code implementation of the compiler.
Additionally, our compiler proof handles diverging
input programs with a lightweight approach based on
logical timeout exceptions. The entire development was
carried out in the HOL4 theorem prover.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Barthe:2014:PRV,
author = "Gilles Barthe and C{\'e}dric Fournet and Benjamin
Gr{\'e}goire and Pierre-Yves Strub and Nikhil Swamy and
Santiago Zanella-B{\'e}guelin",
title = "Probabilistic relational verification for
cryptographic implementations",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "193--205",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535847",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Relational program logics have been used for
mechanizing formal proofs of various cryptographic
constructions. With an eye towards scaling these
successes towards end-to-end security proofs for
implementations of distributed systems, we present RF*,
a relational extension of F*, a general-purpose
higher-order stateful programming language with a
verification system based on refinement types. The
distinguishing feature of F* is a relational Hoare
logic for a higher-order, stateful, probabilistic
language. Through careful language design, we adapt the
F* typechecker to generate both classic and relational
verification conditions, and to automatically discharge
their proofs using an SMT solver. Thus, we are able to
benefit from the existing features of F*, including its
abstraction facilities for modular reasoning about
program fragments. We evaluate RF* experimentally by
programming a series of cryptographic constructions and
protocols, and by verifying their security properties,
ranging from information flow to unlinkability,
integrity, and privacy. Moreover, we validate the
design of RF* by formalizing in Coq a core
probabilistic \lambda calculus and a relational
refinement type system and proving the soundness of the
latter against a denotational semantics of the
probabilistic lambda \lambda calculus.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chaudhuri:2014:BBQ,
author = "Swarat Chaudhuri and Martin Clochard and Armando
Solar-Lezama",
title = "Bridging boolean and quantitative synthesis using
smoothed proof search",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "207--220",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535859",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We present a new technique for parameter synthesis
under boolean and quantitative objectives. The input to
the technique is a ``sketch'' --- a program with
missing numerical parameters --- and a probabilistic
assumption about the program's inputs. The goal is to
automatically synthesize values for the parameters such
that the resulting program satisfies: (1) a {boolean
specification}, which states that the program must meet
certain assertions, and (2) a {quantitative
specification}, which assigns a real valued rating to
every program and which the synthesizer is expected to
optimize. Our method --- called smoothed proof search
--- reduces this task to a sequence of unconstrained
smooth optimization problems that are then solved
numerically. By iteratively solving these problems, we
obtain parameter values that get closer and closer to
meeting the boolean specification; at the limit, we
obtain values that provably meet the specification. The
approximations are computed using a new notion of
smoothing for program abstractions, where an abstract
transformer is approximated by a function that is
continuous according to a metric over abstract states.
We present a prototype implementation of our synthesis
procedure, and experimental results on two benchmarks
from the embedded control domain. The experiments
demonstrate the benefits of smoothed proof search over
an approach that does not meet the boolean and
quantitative synthesis goals simultaneously.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Beyene:2014:CBA,
author = "Tewodros Beyene and Swarat Chaudhuri and Corneliu
Popeea and Andrey Rybalchenko",
title = "A constraint-based approach to solving games on
infinite graphs",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "221--233",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535860",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We present a constraint-based approach to computing
winning strategies in two-player graph games over the
state space of infinite-state programs. Such games have
numerous applications in program verification and
synthesis, including the synthesis of infinite-state
reactive programs and branching-time verification of
infinite-state programs. Our method handles games with
winning conditions given by safety, reachability, and
general Linear Temporal Logic (LTL) properties. For
each property class, we give a deductive proof rule
that --- provided a symbolic representation of the game
players --- describes a winning strategy for a
particular player. Our rules are sound and relatively
complete. We show that these rules can be automated by
using an off-the-shelf Horn constraint solver that
supports existential quantification in clause heads.
The practical promise of the rules is demonstrated
through several case studies, including a challenging
``Cinderella-Stepmother game'' that allows infinite
alternation of discrete and continuous choices by two
players, as well as examples derived from prior work on
program repair and synthesis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Darulova:2014:SCR,
author = "Eva Darulova and Viktor Kuncak",
title = "Sound compilation of reals",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "235--248",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535874",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Writing accurate numerical software is hard because of
many sources of unavoidable uncertainties, including
finite numerical precision of implementations. We
present a programming model where the user writes a
program in a real-valued implementation and
specification language that explicitly includes
different types of uncertainties. We then present a
compilation algorithm that generates a finite-precision
implementation that is guaranteed to meet the desired
precision with respect to real numbers. Our compilation
performs a number of verification steps for different
candidate precisions. It generates verification
conditions that treat all sources of uncertainties in a
unified way and encode reasoning about finite-precision
roundoff errors into reasoning about real numbers. Such
verification conditions can be used as a standardized
format for verifying the precision and the correctness
of numerical programs. Due to their non-linear nature,
precise reasoning about these verification conditions
remains difficult and cannot be handled using
state-of-the art SMT solvers alone. We therefore
propose a new procedure that combines exact SMT solving
over reals with approximate and sound affine and
interval arithmetic. We show that this approach
overcomes scalability limitations of SMT solvers while
providing improved precision over affine and interval
arithmetic. Our implementation gives promising results
on several numerical models, including dynamical
systems, transcendental functions, and controller
implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Huet:2014:YRD,
author = "G{\'e}rard Huet and Hugo Herbelin",
title = "30 years of research and development around {Coq}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "249--249",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2537848",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Brookes:2014:ER,
author = "Stephen Brookes and Peter W. O'Hearn and Uday Reddy",
title = "The essence of {Reynolds}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "251--255",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2537851",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "John Reynolds (1935-2013) was a pioneer of programming
languages research. In this paper we pay tribute to the
man, his ideas, and his influence.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kuper:2014:FAW,
author = "Lindsey Kuper and Aaron Turon and Neelakantan R.
Krishnaswami and Ryan R. Newton",
title = "Freeze after writing: quasi-deterministic parallel
programming with {LVars}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "257--270",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535842",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Deterministic-by-construction parallel programming
models offer the advantages of parallel speedup while
avoiding the nondeterministic, hard-to-reproduce bugs
that plague fully concurrent code. A principled
approach to deterministic-by-construction parallel
programming with shared state is offered by LVars:
shared memory locations whose semantics are defined in
terms of an application-specific lattice. Writes to an
LVar take the least upper bound of the old and new
values with respect to the lattice, while reads from an
LVar can observe only that its contents have crossed a
specified threshold in the lattice. Although it
guarantees determinism, this interface is quite
limited. We extend LVars in two ways. First, we add the
ability to ``freeze'' and then read the contents of an
LVar directly. Second, we add the ability to attach
event handlers to an LVar, triggering a callback when
the LVar's value changes. Together, handlers and
freezing enable an expressive and useful style of
parallel programming. We prove that in a language where
communication takes place through these extended LVars,
programs are at worst quasi-deterministic: on every
run, they either produce the same answer or raise an
error. We demonstrate the viability of our approach by
implementing a library for Haskell supporting a variety
of LVar-based data structures, together with a case
study that illustrates the programming model and yields
promising parallel speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Burckhardt:2014:RDT,
author = "Sebastian Burckhardt and Alexey Gotsman and Hongseok
Yang and Marek Zawirski",
title = "Replicated data types: specification, verification,
optimality",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "271--284",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535848",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Geographically distributed systems often rely on
replicated eventually consistent data stores to achieve
availability and performance. To resolve conflicting
updates at different replicas, researchers and
practitioners have proposed specialized consistency
protocols, called replicated data types, that implement
objects such as registers, counters, sets or lists.
Reasoning about replicated data types has however not
been on par with comparable work on abstract data types
and concurrent data types, lacking specifications,
correctness proofs, and optimality results. To fill in
this gap, we propose a framework for specifying
replicated data types using relations over events and
verifying their implementations using replication-aware
simulations. We apply it to 7 existing implementations
of 4 data types with nontrivial conflict-resolution
strategies and optimizations (last-writer-wins
register, counter, multi-value register and
observed-remove set). We also present a novel technique
for obtaining lower bounds on the worst-case space
overhead of data type implementations and use it to
prove optimality of 4 implementations. Finally, we show
how to specify consistency of replicated stores with
multiple objects axiomatically, in analogy to prior
work on weak memory models. Overall, our work provides
foundational reasoning tools to support research on
replicated eventually consistent stores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bouajjani:2014:VEC,
author = "Ahmed Bouajjani and Constantin Enea and Jad Hamza",
title = "Verifying eventual consistency of optimistic
replication systems",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "285--296",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535877",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We address the verification problem of eventual
consistency of optimistic replication systems. Such
systems are typically used to implement distributed
data structures over large scale networks. We introduce
a formal definition of eventual consistency that
applies to a wide class of existing implementations,
including the ones using speculative executions. Then,
we reduce the problem of checking eventual consistency
to reachability and model checking problems. This
reduction enables the use of existing verification
tools for message-passing programs in the context of
verifying optimistic replication systems. Furthermore,
we derive from these reductions decision procedures for
checking eventual consistency of systems implemented as
finite-state programs communicating through unbounded
unordered channels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{DalLago:2014:CEH,
author = "Ugo {Dal Lago} and Davide Sangiorgi and Michele
Alberti",
title = "On coinductive equivalences for higher-order
probabilistic functional programs",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "297--308",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535872",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We study bisimulation and context equivalence in a
probabilistic lambda-calculus. The contributions of
this paper are threefold. Firstly we show a technique
for proving congruence of probabilistic applicative
bisimilarity. While the technique follows Howe's
method, some of the technicalities are quite different,
relying on non-trivial ``disentangling'' properties for
sets of real numbers. Secondly we show that, while
bisimilarity is in general strictly finer than context
equivalence, coincidence between the two relations is
attained on pure lambda-terms. The resulting equality
is that induced by Levy--Longo trees, generally
accepted as the finest extensional equivalence on pure
lambda-terms under a lazy regime. Finally, we derive a
coinductive characterisation of context equivalence on
the whole probabilistic language, via an extension in
which terms akin to distributions may appear in redex
position. Another motivation for the extension is that
its operational semantics allows us to experiment with
a different congruence technique, namely that of
logical bisimilarity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ehrhard:2014:PCS,
author = "Thomas Ehrhard and Christine Tasson and Michele
Pagani",
title = "Probabilistic coherence spaces are fully abstract for
probabilistic {PCF}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "309--320",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535865",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Probabilistic coherence spaces (PCoh) yield a
semantics of higher-order probabilistic computation,
interpreting types as convex sets and programs as power
series. We prove that the equality of interpretations
in Pcoh characterizes the operational
indistinguishability of programs in PCF with a random
primitive. This is the first result of full abstraction
for a semantics of probabilistic PCF. The key
ingredient relies on the regularity of power series.
Along the way to the theorem, we design a weighted
intersection type assignment system giving a logical
presentation of PCoh.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gordon:2014:TSD,
author = "Andrew D. Gordon and Thore Graepel and Nicolas Rolland
and Claudio Russo and Johannes Borgstrom and John
Guiver",
title = "{Tabular}: a schema-driven probabilistic programming
language",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "321--334",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535850",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We propose a new kind of probabilistic programming
language for machine learning. We write programs simply
by annotating existing relational schemas with
probabilistic model expressions. We describe a detailed
design of our language, Tabular, complete with formal
semantics and type system. A rich series of examples
illustrates the expressiveness of Tabular. We report an
implementation, and show evidence of the succinctness
of our notation relative to current best practice.
Finally, we describe and verify a transformation of
Tabular schemas so as to predict missing values in a
concrete database. The ability to query for missing
values provides a uniform interface to a wide variety
of tasks, including classification, clustering,
recommendation, and ranking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sergey:2014:MHO,
author = "Ilya Sergey and Dimitrios Vytiniotis and Simon Peyton
Jones",
title = "Modular, higher-order cardinality analysis in theory
and practice",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "335--347",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535861",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Since the mid '80s, compiler writers for functional
languages (especially lazy ones) have been writing
papers about identifying and exploiting thunks and
lambdas that are used only once. However it has proved
difficult to achieve both power and simplicity in
practice. We describe a new, modular analysis for a
higher-order language, which is both simple and
effective, and present measurements of its use in a
full-scale, state of the art optimising compiler. The
analysis finds many single-entry thunks and one-shot
lambdas and enables a number of program
optimisations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chang:2014:PL,
author = "Stephen Chang and Matthias Felleisen",
title = "Profiling for laziness",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "349--360",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535887",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "While many programmers appreciate the benefits of lazy
programming at an abstract level, determining which
parts of a concrete program to evaluate lazily poses a
significant challenge for most of them. Over the past
thirty years, experts have published numerous papers on
the problem, but developing this level of expertise
requires a significant amount of experience. We present
a profiling-based technique that captures and automates
this expertise for the insertion of laziness
annotations into strict programs. To make this idea
precise, we show how to equip a formal semantics with a
metric that measures waste in an evaluation. Then we
explain how to implement this metric as a dynamic
profiling tool that suggests where to insert laziness
into a program. Finally, we present evidence that our
profiler's suggestions either match or improve on an
expert's use of laziness in a range of real-world
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cave:2014:FRP,
author = "Andrew Cave and Francisco Ferreira and Prakash
Panangaden and Brigitte Pientka",
title = "Fair reactive programming",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "361--372",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535881",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Functional Reactive Programming (FRP) models reactive
systems with events and signals, which have previously
been observed to correspond to the ``eventually'' and
``always'' modalities of linear temporal logic (LTL).
In this paper, we define a constructive variant of LTL
with least fixed point and greatest fixed point
operators in the spirit of the modal mu-calculus, and
give it a proofs-as-programs interpretation as a
foundational calculus for reactive programs. Previous
work emphasized the propositions-as-types part of the
correspondence between LTL and FRP; here we emphasize
the proofs-as-programs part by employing structural
proof theory. We show that the type system is
expressive enough to enforce liveness properties such
as the fairness of schedulers and the eventual delivery
of results. We illustrate programming in this calculus
using (co)iteration operators. We prove type
preservation of our operational semantics, which
guarantees that our programs are causal. We give also a
proof of strong normalization which provides
justification that our programs are productive and that
they satisfy liveness properties derived from their
types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Abdulla:2014:ODP,
author = "Parosh Abdulla and Stavros Aronis and Bengt Jonsson
and Konstantinos Sagonas",
title = "Optimal dynamic partial order reduction",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "373--384",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535845",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Stateless model checking is a powerful technique for
program verification, which however suffers from an
exponential growth in the number of explored
executions. A successful technique for reducing this
number, while still maintaining complete coverage, is
Dynamic Partial Order Reduction (DPOR). We present a
new DPOR algorithm, which is the first to be provably
optimal in that it always explores the minimal number
of executions. It is based on a novel class of sets,
called source sets, which replace the role of
persistent sets in previous algorithms. First, we show
how to modify an existing DPOR algorithm to work with
source sets, resulting in an efficient and simple to
implement algorithm. Second, we extend this algorithm
with a novel mechanism, called wakeup trees, that
allows to achieve optimality. We have implemented both
algorithms in a stateless model checking tool for
Erlang programs. Experiments show that source sets
significantly increase the performance and that wakeup
trees incur only a small overhead in both time and
space.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Itzhaky:2014:MRA,
author = "Shachar Itzhaky and Anindya Banerjee and Neil Immerman
and Ori Lahav and Aleksandar Nanevski and Mooly Sagiv",
title = "Modular reasoning about heap paths via effectively
propositional formulas",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "385--396",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535854",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "First order logic with transitive closure, and
separation logic enable elegant interactive
verification of heap-manipulating programs. However,
undecidabilty results and high asymptotic complexity of
checking validity preclude complete automatic
verification of such programs, even when loop
invariants and procedure contracts are specified as
formulas in these logics. This paper tackles the
problem of procedure-modular verification of
reachability properties of heap-manipulating programs
using efficient decision procedures that are complete:
that is, a SAT solver must generate a counterexample
whenever a program does not satisfy its specification.
By (a) requiring each procedure modifies a fixed set of
heap partitions and creates a bounded amount of heap
sharing, and (b) restricting program contracts and loop
invariants to use only deterministic paths in the heap,
we show that heap reachability updates can be described
in a simple manner. The restrictions force program
specifications and verification conditions to lie
within a fragment of first-order logic with transitive
closure that is reducible to effectively propositional
logic, and hence facilitate sound, complete and
efficient verification. We implemented a tool atop Z3
and report on preliminary experiments that establish
the correctness of several programs that manipulate
linked data structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chong:2014:SCA,
author = "Nathan Chong and Alastair F. Donaldson and Jeroen
Ketema",
title = "A sound and complete abstraction for reasoning about
parallel prefix sums",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "397--409",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535882",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Prefix sums are key building blocks in the
implementation of many concurrent software
applications, and recently much work has gone into
efficiently implementing prefix sums to run on
massively parallel graphics processing units (GPUs).
Because they lie at the heart of many GPU-accelerated
applications, the correctness of prefix sum
implementations is of prime importance. We introduce a
novel abstraction, the interval of summations, that
allows scalable reasoning about implementations of
prefix sums. We present this abstraction as a monoid,
and prove a soundness and completeness result showing
that a generic sequential prefix sum implementation is
correct for an array of length $n$ if and only if it
computes the correct result for a specific test case
when instantiated with the interval of summations
monoid. This allows correctness to be established by
running a single test where the input and result
require O(n lg(n)) space. This improves upon an
existing result by Sheeran where the input requires O(n
lg(n)) space and the result O(n$^2$ \lg(n)) space, and
is more feasible for large n than a method by
Voigtlaender that uses O(n) space for the input and
result but requires running O(n$^2$ ) tests. We then
extend our abstraction and results to the context of
data-parallel programs, developing an automated
verification method for GPU implementations of prefix
sums. Our method uses static verification to prove that
a generic prefix sum implementation is data race-free,
after which functional correctness of the
implementation can be determined by running a single
test case under the interval of summations abstraction.
We present an experimental evaluation using four
different prefix sum algorithms, showing that our
method is highly automatic, scales to large thread
counts, and significantly outperforms Voigtlaender's
method when applied to large arrays.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Miller:2014:ADS,
author = "Andrew Miller and Michael Hicks and Jonathan Katz and
Elaine Shi",
title = "Authenticated data structures, generically",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "411--423",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535851",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "An authenticated data structure (ADS) is a data
structure whose operations can be carried out by an
untrusted prover, the results of which a verifier can
efficiently check as authentic. This is done by having
the prover produce a compact proof that the verifier
can check along with each operation's result. ADSs thus
support outsourcing data maintenance and processing
tasks to untrusted servers without loss of integrity.
Past work on ADSs has focused on particular data
structures (or limited classes of data structures), one
at a time, often with support only for particular
operations. This paper presents a generic method, using
a simple extension to a ML-like functional programming
language we call \lambda o (lambda-auth), with which
one can program authenticated operations over any data
structure defined by standard type constructors,
including recursive types, sums, and products. The
programmer writes the data structure largely as usual
and it is compiled to code to be run by the prover and
verifier. Using a formalization of \lambda o we prove
that all well-typed \lambda o programs result in code
that is secure under the standard cryptographic
assumption of collision-resistant hash functions. We
have implemented \lambda o as an extension to the OCaml
compiler, and have used it to produce authenticated
versions of many interesting data structures including
binary search trees, red-black+ trees, skip lists, and
more. Performance experiments show that our approach is
efficient, giving up little compared to the
hand-optimized data structures developed previously.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Swamy:2014:GTE,
author = "Nikhil Swamy and Cedric Fournet and Aseem Rastogi and
Karthikeyan Bhargavan and Juan Chen and Pierre-Yves
Strub and Gavin Bierman",
title = "Gradual typing embedded securely in {JavaScript}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "425--437",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535889",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "JavaScript's flexible semantics makes writing correct
code hard and writing secure code extremely difficult.
To address the former problem, various forms of gradual
typing have been proposed, such as Closure and
TypeScript. However, supporting all common programming
idioms is not easy; for example, TypeScript
deliberately gives up type soundness for programming
convenience. In this paper, we propose a gradual type
system and implementation techniques that provide
important safety and security guarantees. We present
TS\# , a gradual type system and source-to-source
compiler for JavaScript. In contrast to prior gradual
type systems, TS\# features full runtime reflection
over three kinds of types: (1) simple types for
higher-order functions, recursive datatypes and
dictionary-based extensible records; (2) the type any,
for dynamically type-safe TS\# expressions; and (3) the
type un, for untrusted, potentially malicious
JavaScript contexts in which TS\# is embedded. After
type-checking, the compiler instruments the program
with various checks to ensure the type safety of TS\#
despite its interactions with arbitrary JavaScript
contexts, which are free to use eval, stack walks,
prototype customizations, and other offensive features.
The proof of our main theorem employs a form of
type-preserving compilation, wherein we prove all the
runtime invariants of the translation of TS\# to
JavaScript by showing that translated programs are
well-typed in JS\# , a previously proposed dependently
typed language for proving functional correctness of
JavaScript programs. We describe a prototype compiler,
a secure runtime, and sample applications for TS\#. Our
examples illustrate how web security patterns that
developers currently program in JavaScript (with much
difficulty and still with dubious results) can instead
be programmed naturally in TS\#, retaining a flavor of
idiomatic JavaScript, while providing strong safety
guarantees by virtue of typing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Long:2014:SIF,
author = "Fan Long and Stelios Sidiroglou-Douskos and Deokhwan
Kim and Martin Rinard",
title = "Sound input filter generation for integer overflow
errors",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "439--452",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535888",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We present a system, SIFT, for generating input
filters that nullify integer overflow errors associated
with critical program sites such as memory allocation
or block copy sites. SIFT uses a static program
analysis to generate filters that discard inputs that
may trigger integer overflow errors in the computations
of the sizes of allocated memory blocks or the number
of copied bytes in block copy operations. Unlike all
previous techniques of which we are aware, SIFT is
sound --- if an input passes the filter, it will not
trigger an integer overflow error at any analyzed site.
Our results show that SIFT successfully analyzes (and
therefore generates sound input filters for) 56 out of
58 memory allocation and block memory copy sites in
analyzed input processing modules from five
applications (VLC, Dillo, Swfdec, Swftools, and GIMP).
These nullified errors include six known integer
overflow vulnerabilities. Our results also show that
applying these filters to 62895 real-world inputs
produces no false positives. The analysis and filter
generation times are all less than a second.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Brotherston:2014:PCS,
author = "James Brotherston and Jules Villard",
title = "Parametric completeness for separation theories",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "453--464",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535844",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "In this paper, we close the logical gap between
provability in the logic BBI, which is the
propositional basis for separation logic, and validity
in an intended class of separation models, as employed
in applications of separation logic such as program
verification. An intended class of separation models is
usually specified by a collection of axioms describing
the specific model properties that are expected to
hold, which we call a separation theory. Our main
contributions are as follows. First, we show that
several typical properties of separation theories are
not definable in BBI. Second, we show that these
properties become definable in a suitable hybrid
extension of BBI, obtained by adding a theory of naming
to BBI in the same way that hybrid logic extends normal
modal logic. The binder-free extension captures most of
the properties we consider, and the full extension
HyBBI(V) with the usual V binder of hybrid logic covers
all these properties. Third, we present an axiomatic
proof system for our hybrid logic whose extension with
any set of ``pure'' axioms is sound and complete with
respect to the models satisfying those axioms. As a
corollary of this general result, we obtain, in a
parametric manner, a sound and complete axiomatic proof
system for any separation theory from our considered
class. To the best of our knowledge, this class
includes all separation theories appearing in the
published literature.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hou:2014:PSP,
author = "Zh{\'e} H{\'o}u and Ranald Clouston and Rajeev
Gor{\'e} and Alwen Tiu",
title = "Proof search for propositional abstract separation
logics via labelled sequents",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "465--476",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535864",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Abstract separation logics are a family of extensions
of Hoare logic for reasoning about programs that mutate
memory. These logics are ``abstract'' because they are
independent of any particular concrete memory model.
Their assertion languages, called propositional
abstract separation logics, extend the logic of
(Boolean) Bunched Implications (BBI) in various ways.
We develop a modular proof theory for various
propositional abstract separation logics using cut-free
labelled sequent calculi. We first extend the cut-fee
labelled sequent calculus for BBI of Hou et al to
handle Calcagno et al's original logic of separation
algebras by adding sound rules for partial-determinism
and cancellativity, while preserving cut-elimination.
We prove the completeness of our calculus via a sound
intermediate calculus that enables us to construct
counter-models from the failure to find a proof. We
then capture other propositional abstract separation
logics by adding sound rules for indivisible unit and
disjointness, while maintaining completeness and
cut-elimination. We present a theorem prover based on
our labelled calculus for these logics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lee:2014:PSS,
author = "Wonyeol Lee and Sungwoo Park",
title = "A proof system for separation logic with magic wand",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "477--490",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535871",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Separation logic is an extension of Hoare logic which
is acknowledged as an enabling technology for
large-scale program verification. It features two new
logical connectives, separating conjunction and
separating implication, but most of the applications of
separation logic have exploited only separating
conjunction without considering separating implication.
Nevertheless the power of separating implication has
been well recognized and there is a growing interest in
its use for program verification. This paper develops a
proof system for full separation logic which supports
not only separating conjunction but also separating
implication. The proof system is developed in the style
of sequent calculus and satisfies the admissibility of
cut. The key challenge in the development is to devise
a set of inference rules for manipulating heap
structures that ensure the completeness of the proof
system with respect to separation logic. We show that
our proof of completeness directly translates to a
proof search strategy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Atkey:2014:PCL,
author = "Robert Atkey",
title = "From parametricity to conservation laws, via
{Noether}'s theorem",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "491--502",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535867",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Invariance is of paramount importance in programming
languages and in physics. In programming languages,
John Reynolds' theory of relational parametricity
demonstrates that parametric polymorphic programs are
invariant under change of data representation, a
property that yields ``free'' theorems about programs
just from their types. In physics, Emmy Noether showed
that if the action of a physical system is invariant
under change of coordinates, then the physical system
has a conserved quantity: a quantity that remains
constant for all time. Knowledge of conserved
quantities can reveal deep properties of physical
systems. For example, the conservation of energy is by
Noether's theorem a consequence of a system's
invariance under time-shifting. In this paper, we link
Reynolds' relational parametricity with Noether's
theorem for deriving conserved quantities. We propose
an extension of System F$ \omega $ with new kinds,
types and term constants for writing programs that
describe classical mechanical systems in terms of their
Lagrangians. We show, by constructing a relationally
parametric model of our extension of F$ \omega $, that
relational parametricity is enough to satisfy the
hypotheses of Noether's theorem, and so to derive
conserved quantities for free, directly from the
polymorphic types of Lagrangians expressed in our
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Atkey:2014:RPM,
author = "Robert Atkey and Neil Ghani and Patricia Johann",
title = "A relationally parametric model of dependent type
theory",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "503--515",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535852",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Reynolds' theory of relational parametricity captures
the invariance of polymorphically typed programs under
change of data representation. Reynolds' original work
exploited the typing discipline of the polymorphically
typed lambda-calculus System F, but there is now
considerable interest in extending relational
parametricity to type systems that are richer and more
expressive than that of System F. This paper constructs
parametric models of predicative and impredicative
dependent type theory. The significance of our models
is twofold. Firstly, in the impredicative variant we
are able to deduce the existence of initial algebras
for all indexed= functors. To our knowledge, ours is
the first account of parametricity for dependent types
that is able to lift the useful deduction of the
existence of initial algebras in parametric models of
System F to the dependently typed setting. Secondly,
our models offer conceptual clarity by uniformly
expressing relational parametricity for dependent types
in terms of reflexive graphs, which allows us to unify
the interpretations of types and kinds, instead of
taking the relational interpretation of types as a
primitive notion. Expressing our model in terms of
reflexive graphs ensures that it has canonical choices
for the interpretations of the standard type
constructors of dependent type theory, except for the
interpretation of the universe of small types, where we
formulate a refined interpretation tailored for
relational parametricity. Moreover, our reflexive graph
model opens the door to generalisations of relational
parametricity, for example to higher-dimensional
relational parametricity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Murawski:2014:GSI,
author = "Andrzej S. Murawski and Nikos Tzevelekos",
title = "Game semantics for interface middleweight {Java}",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "517--528",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535880",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We consider an object calculus in which open terms
interact with the environment through interfaces. The
calculus is intended to capture the essence of
contextual interactions of Middleweight Java code.
Using game semantics, we provide fully abstract models
for the induced notions of contextual approximation and
equivalence. These are the first denotational models of
this kind.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Jeannet:2014:AAG,
author = "Bertrand Jeannet and Peter Schrammel and Sriram
Sankaranarayanan",
title = "Abstract acceleration of general linear loops",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "529--540",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535843",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We present abstract acceleration techniques for
computing loop invariants for numerical programs with
linear assignments and conditionals. Whereas abstract
interpretation techniques typically over-approximate
the set of reachable states iteratively, abstract
acceleration captures the effect of the loop with a
single, non-iterative transfer function applied to the
initial states at the loop head. In contrast to
previous acceleration techniques, our approach applies
to any linear loop without restrictions. Its novelty
lies in the use of the Jordan normal form decomposition
of the loop body to derive symbolic expressions for the
entries of the matrix modeling the effect of $\eta
\geq \Omicron$ iterations of the loop. The entries of
such a matrix depend on \eta through complex
polynomial, exponential and trigonometric functions.
Therefore, we introduces an abstract domain for
matrices that captures the linear inequality relations
between these complex expressions. This results in an
abstract matrix for describing the fixpoint semantics
of the loop. Our approach integrates smoothly into
standard abstract interpreters and can handle programs
with nested loops and loops containing conditional
branches. We evaluate it over small but complex loops
that are commonly found in control software, comparing
it with other tools for computing linear loop
invariants. The loops in our benchmarks typically
exhibit polynomial, exponential and oscillatory
behaviors that present challenges to existing
approaches. Our approach finds non-trivial invariants
to prove useful bounds on the values of variables for
such loops, clearly outperforming the existing
approaches in terms of precision while exhibiting good
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{DAntoni:2014:MSA,
author = "Loris D'Antoni and Margus Veanes",
title = "Minimization of symbolic automata",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "541--553",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535849",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Symbolic Automata extend classical automata by using
symbolic alphabets instead of finite ones. Most of the
classical automata algorithms rely on the alphabet
being finite, and generalizing them to the symbolic
setting is not a trivial task. In this paper we study
the problem of minimizing symbolic automata. We
formally define and prove the basic properties of
minimality in the symbolic setting, and lift classical
minimization algorithms (Huffman-Moore's and Hopcroft's
algorithms) to symbolic automata. While Hopcroft's
algorithm is the fastest known algorithm for DFA
minimization, we show how, in the presence of symbolic
alphabets, it can incur an exponential blowup. To
address this issue, we introduce a new algorithm that
fully benefits from the symbolic representation of the
alphabet and does not suffer from the exponential
blowup. We provide comprehensive performance evaluation
of all the algorithms over large benchmarks and against
existing state-of-the-art implementations. The
experiments show how the new symbolic algorithm is
faster than previous implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chaudhuri:2014:CAD,
author = "Swarat Chaudhuri and Azadeh Farzan and Zachary
Kincaid",
title = "Consistency analysis of decision-making programs",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "555--567",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535858",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Applications in many areas of computing make discrete
decisions under uncertainty, for reasons such as
limited numerical precision in calculations and errors
in sensor-derived inputs. As a result, individual
decisions made by such programs may be
nondeterministic, and lead to contradictory decisions
at different points of an execution. This means that an
otherwise correct program may execute along paths, that
it would not follow under its ideal semantics,
violating essential program invariants on the way. A
program is said to be consistent if it does not suffer
from this problem despite uncertainty in decisions. In
this paper, we present a sound, automatic program
analysis for verifying that a program is consistent in
this sense. Our analysis proves that each decision made
along a program execution is consistent with the
decisions made earlier in the execution. The proof is
done by generating an invariant that abstracts the set
of all decisions made along executions that end at a
program location l, then verifying, using a fixpoint
constraint-solver, that no contradiction can be derived
when these decisions are combined with new decisions
made at l. We evaluate our analysis on a collection of
programs implementing algorithms in computational
geometry. Consistency is known to be a critical,
frequently-violated, and thoroughly studied correctness
property in geometry, but ours is the first attempt at
automated verification of consistency of geometric
algorithms. Our benchmark suite consists of
implementations of convex hull computation,
triangulation, and point location algorithms. On almost
all examples that are not consistent (with two
exceptions), our analysis is able to verify consistency
within a few minutes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhang:2014:TGD,
author = "Danfeng Zhang and Andrew C. Myers",
title = "Toward general diagnosis of static errors",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "569--581",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535870",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We introduce a general way to locate programmer
mistakes that are detected by static analyses such as
type checking. The program analysis is expressed in a
constraint language in which mistakes result in
unsatisfiable constraints. Given an unsatisfiable
system of constraints, both satisfiable and
unsatisfiable constraints are analyzed, to identify the
program expressions most likely to be the cause of
unsatisfiability. The likelihood of different error
explanations is evaluated under the assumption that the
programmer's code is mostly correct, so the simplest
explanations are chosen, following Bayesian principles.
For analyses that rely on programmer-stated
assumptions, the diagnosis also identifies assumptions
likely to have been omitted. The new error diagnosis
approach has been implemented for two very different
program analyses: type inference in OCaml and
information flow checking in Jif. The effectiveness of
the approach is evaluated using previously collected
programs containing errors. The results show that when
compared to existing compilers and other tools, the
general technique identifies the location of programmer
errors significantly more accurately.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2014:CFT,
author = "Sheng Chen and Martin Erwig",
title = "Counter-factual typing for debugging type errors",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "583--594",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535863",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Changing a program in response to a type error plays
an important part in modern software development.
However, the generation of good type error messages
remains a problem for highly expressive type systems.
Existing approaches often suffer from a lack of
precision in locating errors and proposing remedies.
Specifically, they either fail to locate the source of
the type error consistently, or they report too many
potential error locations. Moreover, the change
suggestions offered are often incorrect. This makes the
debugging process tedious and ineffective. We present
an approach to the problem of type debugging that is
based on generating and filtering a comprehensive set
of type-change suggestions. Specifically, we generate
all (program-structure-preserving) type changes that
can possibly fix the type error. These suggestions will
be ranked and presented to the programmer in an
iterative fashion. In some cases we also produce
suggestions to change the program. In most situations,
this strategy delivers the correct change suggestions
quickly, and at the same time never misses any rare
suggestions. The computation of the potentially huge
set of type-change suggestions is efficient since it is
based on a variational type inference algorithm that
type checks a program with variations only once,
efficiently reusing type information for shared parts.
We have evaluated our method and compared it with
previous approaches. Based on a large set of examples
drawn from the literature, we have found that our
method outperforms other approaches and provides a
viable alternative.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Boker:2014:BTS,
author = "Udi Boker and Thomas A. Henzinger and Arjun
Radhakrishna",
title = "Battery transition systems",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "595--606",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535875",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "The analysis of the energy consumption of software is
an important goal for quantitative formal methods.
Current methods, using weighted transition systems or
energy games, model the energy source as an ideal
resource whose status is characterized by one number,
namely the amount of remaining energy. Real batteries,
however, exhibit behaviors that can deviate
substantially from an ideal energy resource. Based on a
discretization of a standard continuous battery model,
we introduce {\em battery transition systems}. In this
model, a battery is viewed as consisting of two parts
--- the available-charge tank and the bound-charge
tank. Any charge or discharge is applied to the
available-charge tank. Over time, the energy from each
tank diffuses to the other tank. Battery transition
systems are infinite state systems that, being not
well-structured, fall into no decidable class that is
known to us. Nonetheless, we are able to prove that the
$ \omega $-regular model-checking problem is decidable
for battery transition systems. We also present a case
study on the verification of control programs for
energy-constrained semi-autonomous robots.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Li:2014:SOS,
author = "Yi Li and Aws Albarghouthi and Zachary Kincaid and
Arie Gurfinkel and Marsha Chechik",
title = "Symbolic optimization with {SMT} solvers",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "607--618",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535857",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "The rise in efficiency of Satisfiability Modulo
Theories (SMT) solvers has created numerous uses for
them in software verification, program synthesis,
functional programming, refinement types, etc. In all
of these applications, SMT solvers are used for
generating satisfying assignments (e.g., a witness for
a bug) or proving unsatisfiability/validity(e.g.,
proving that a subtyping relation holds). We are often
interested in finding not just an arbitrary satisfying
assignment, but one that optimizes
(minimizes/maximizes) certain criteria. For example, we
might be interested in detecting program executions
that maximize energy usage (performance bugs), or
synthesizing short programs that do not make expensive
API calls. Unfortunately, none of the available SMT
solvers offer such optimization capabilities. In this
paper, we present SYMBA, an efficient SMT-based
optimization algorithm for objective functions in the
theory of linear real arithmetic (LRA). Given a formula
\phi and an objective function t, SYMBA finds a
satisfying assignment of \phi that maximizes the value
of t. SYMBA utilizes efficient SMT solvers as black
boxes. As a result, it is easy to implement and it
directly benefits from future advances in SMT solvers.
Moreover, SYMBA can optimize a set of objective
functions, reusing information between them to speed up
the analysis. We have implemented SYMBA and evaluated
it on a large number of optimization benchmarks drawn
from program analysis tasks. Our results indicate the
power and efficiency of SYMBA in comparison with
competing approaches, and highlight the importance of
its multi-objective-function feature.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Benton:2014:AEP,
author = "Nick Benton and Martin Hofmann and Vivek Nigam",
title = "Abstract effects and proof-relevant logical
relations",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "619--631",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535869",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We give a denotational semantics for a region-based
effect system that supports type abstraction in the
sense that only externally visible effects need to be
tracked: non-observable internal modifications, such as
the reorganisation of a search tree or lazy
initialisation, can count as 'pure' or 'read only'.
This 'fictional purity' allows clients of a module to
validate soundly more effect-based program equivalences
than would be possible with previous semantics. Our
semantics uses a novel variant of logical relations
that maps types not merely to partial equivalence
relations on values, as is commonly done, but rather to
a proof-relevant generalisation thereof, namely
setoids. The objects of a setoid establish that values
inhabit semantic types, whilst its morphisms are
understood as proofs of semantic equivalence. The
transition to proof-relevance solves two awkward
problems caused by na{\"\i}ve use of existential
quantification in Kripke logical relations, namely
failure of admissibility and spurious functional
dependencies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Katsumata:2014:PEM,
author = "Shin-ya Katsumata",
title = "Parametric effect monads and semantics of effect
systems",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "633--645",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535846",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "We study fundamental properties of a generalisation of
monad called parametric effect monad, and apply it to
the interpretation of general effect systems whose
effects have sequential composition operators. We show
that parametric effect monads admit analogues of the
structures and concepts that exist for monads, such as
Kleisli triples, the state monad and the continuation
monad, Plotkin and Power's algebraic operations, and
the categorical ++-lifting. We also show a systematic
method to generate both effects and a parametric effect
monad from a monad morphism. Finally, we introduce two
effect systems with explicit and implicit subeffecting,
and discuss their denotational semantics and the
soundness of effect systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Pagani:2014:AQS,
author = "Michele Pagani and Peter Selinger and Beno{\^\i}t
Valiron",
title = "Applying quantitative semantics to higher-order
quantum computing",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "647--658",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535879",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Finding a denotational semantics for higher order
quantum computation is a long-standing problem in the
semantics of quantum programming languages. Most past
approaches to this problem fell short in one way or
another, either limiting the language to an unusably
small finitary fragment, or giving up important
features of quantum physics such as entanglement. In
this paper, we propose a denotational semantics for a
quantum lambda calculus with recursion and an infinite
data type, using constructions from quantitative
semantics of linear logic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Accattoli:2014:NST,
author = "Beniamino Accattoli and Eduardo Bonelli and Delia
Kesner and Carlos Lombardi",
title = "A nonstandard standardization theorem",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "659--670",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535886",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Standardization is a fundamental notion for connecting
programming languages and rewriting calculi. Since both
programming languages and calculi rely on substitution
for defining their dynamics, explicit substitutions
(ES) help further close the gap between theory and
practice. This paper focuses on standardization for the
linear substitution calculus, a calculus with ES
capable of mimicking reduction in lambda-calculus and
linear logic proof-nets. For the latter, proof-nets can
be formalized by means of a simple equational theory
over the linear substitution calculus. Contrary to
other extant calculi with ES, our system can be
equipped with a residual theory in the sense of
L{\'e}vy, which is used to prove a left-to-right
standardization theorem for the calculus with ES but
without the equational theory. Such a theorem, however,
does not lift from the calculus with ES to proof-nets,
because the notion of left-to-right derivation is not
preserved by the equational theory. We then relax the
notion of left-to-right standard derivation, based on a
total order on redexes, to a more liberal notion of
standard derivation based on partial orders. Our proofs
rely on Gonthier, L{\'e}vy, and Melli{\`e}s' axiomatic
theory for standardization. However, we go beyond
merely applying their framework, revisiting some of its
key concepts: we obtain uniqueness (modulo) of standard
derivations in an abstract way and we provide a
coinductive characterization of their key abstract
notion of external redex. This last point is then used
to give a simple proof that linear head reduction --a
nondeterministic strategy having a central role in the
theory of linear logic-- is standard.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Eisenberg:2014:CTF,
author = "Richard A. Eisenberg and Dimitrios Vytiniotis and
Simon Peyton Jones and Stephanie Weirich",
title = "Closed type families with overlapping equations",
journal = j-SIGPLAN,
volume = "49",
number = "1",
pages = "671--683",
month = jan,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578855.2535856",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Mar 4 17:04:57 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "POPL '14 conference proceedings.",
abstract = "Open, type-level functions are a recent innovation in
Haskell that move Haskell towards the expressiveness of
dependent types, while retaining the look and feel of a
practical programming language. This paper shows how to
increase expressiveness still further, by adding closed
type functions whose equations may overlap, and may
have non-linear patterns over an open type universe.
Although practically useful and simple to implement,
these features go beyond conventional dependent type
theory in some respects, and have a subtle
metatheory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lerner:2014:TRT,
author = "Benjamin S. Lerner and Joe Gibbs Politz and Arjun Guha
and Shriram Krishnamurthi",
title = "{TeJaS}: retrofitting type systems for {JavaScript}",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "1--16",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508170",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "JavaScript programs vary widely in functionality,
complexity, and use, and analyses of these programs
must accommodate such variations. Type-based analyses
are typically the simplest such analyses, but due to
the language's subtle idioms and many
application-specific needs --- such as ensuring
general-purpose type correctness, security properties,
or proper library usage --- we have found that a single
type system does not suffice for all purposes. However,
these varied uses still share many reusable common
elements. In this paper we present TeJaS, a framework
for building type systems for JavaScript. TeJaS has
been engineered modularly to encourage experimentation.
Its initial type environment is reified, to admit easy
modeling of the various execution contexts of
JavaScript programs, and its type language and typing
rules are extensible, to enable variations of the type
system to be constructed easily. The paper presents the
base TeJaS type system, which performs traditional
type-checking for JavaScript. Because JavaScript
demands complex types, we explain several design
decisions to improve user ergonomics. We then describe
TeJaS's modular structure, and illustrate it by
reconstructing the essence of a very different type
system for JavaScript. Systems built from TeJaS have
been applied to several real-world, third-party
JavaScript programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Kashyap:2014:TRS,
author = "Vineeth Kashyap and John Sarracino and John Wagner and
Ben Wiedermann and Ben Hardekopf",
title = "Type refinement for static analysis of {JavaScript}",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "17--26",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508175",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "Static analysis of JavaScript has proven useful for a
variety of purposes, including optimization, error
checking, security auditing, program refactoring, and
more. We propose a technique called type refinement
that can improve the precision of such static analyses
for JavaScript without any discernible performance
impact. Refinement is a known technique that uses the
conditions in branch guards to refine the analysis
information propagated along each branch path. The key
insight of this paper is to recognize that JavaScript
semantics include many implicit conditional checks on
types, and that performing type refinement on these
implicit checks provides significant benefit for
analysis precision. To demonstrate the effectiveness of
type refinement, we implement a static analysis tool
for reporting potential type-errors in JavaScript
programs. We provide an extensive empirical evaluation
of type refinement using a benchmark suite containing a
variety of JavaScript application domains, ranging from
the standard performance benchmark suites (Sunspider
and Octane), to open-source JavaScript applications, to
machine-generated JavaScript via Emscripten. We show
that type refinement can significantly improve analysis
precision by up to 86\% without affecting the
performance of the analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Allende:2014:CIS,
author = "Esteban Allende and Johan Fabry and {\'E}ric Tanter",
title = "Cast insertion strategies for gradually-typed
objects",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "27--36",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508171",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "Gradual typing enables a smooth and progressive
integration of static and dynamic typing. The semantics
of a gradually-typed program is given by translation to
an intermediate language with casts: runtime type
checks that control the boundaries between statically-
and dynamically-typed portions of a program. This paper
studies the performance of different cast insertion
strategies in the context of Gradualtalk, a
gradually-typed Smalltalk. We first implement the
strategy specified by Siek and Taha, which inserts
casts at call sites. We then study the dual approach,
which consists in performing casts in callees. Based on
the observation that both strategies perform well in
different scenarios, we design a hybrid strategy that
combines the best of each approach. We evaluate these
three strategies using both micro- and
macro-benchmarks. We also discuss the impact of these
strategies on memory, modularity, and inheritance. The
hybrid strategy constitutes a promising cast insertion
strategy for adding gradual types to existing
dynamically-typed languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Kedlaya:2014:ITS,
author = "Madhukar N. Kedlaya and Jared Roesch and Behnam
Robatmili and Mehrdad Reshadi and Ben Hardekopf",
title = "Improved type specialization for dynamic scripting
languages",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "37--48",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508177",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "Type feedback and type inference are two common
methods used to optimize dynamic languages such as
JavaScript. Each of these methods has its own strengths
and weaknesses, and we propose that each can benefit
from the other if combined in the right way. We explore
the interdependency between these two methods and
propose two novel ways to combine them in order to
significantly increase their aggregate benefit and
decrease their aggregate overhead. In our proposed
strategy, an initial type inference pass is applied
that can reduce type feedback overhead by enabling more
intelligent placement of profiling hooks. This initial
type inference pass is novel in the literature. After
profiling, a final type inference pass uses the type
information from profiling to generate efficient code.
While this second pass is not novel, we significantly
improve its effectiveness in a novel way by feeding the
type inference pass information about the function
signature, i.e., the types of the function's arguments
for a specific function invocation. Our results show
significant speedups when using these low-overhead
strategies, ranging from $ 1.2 \times $ to $ 4 \times $
over an implementation that does not perform type
feedback or type inference based optimizations. Our
experiments are carried out across a wide range of
traditional benchmarks and realistic web applications.
The results also show an average reduction of 23.5\% in
the size of the profiled data for these benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Keil:2014:EDA,
author = "Matthias Keil and Peter Thiemann",
title = "Efficient dynamic access analysis using {JavaScript}
proxies",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "49--60",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508176",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "JSConTest introduced the notions of effect monitoring
and dynamic effect inference for JavaScript. It enables
the description of effects with path specifications
resembling regular expressions. It is implemented by an
offline source code transformation. To overcome the
limitations of the JSConTest implementation, we
redesigned and reimplemented effect monitoring by
taking advantage of JavaScript proxies. Our new design
avoids all drawbacks of the prior implementation. It
guarantees full interposition; it is not restricted to
a subset of JavaScript; it is self-maintaining; and its
scalability to large programs is significantly better
than with JSConTest. The improved scalability has two
sources. First, the reimplementation is significantly
faster than the original, transformation-based
implementation. Second, the reimplementation relies on
the fly-weight pattern and on trace reduction to
conserve memory. Only the combination of these
techniques enables monitoring and inference for large
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Weiher:2014:PIU,
author = "Marcel Weiher and Robert Hirschfeld",
title = "Polymorphic identifiers: uniform resource access in
{Objective-Smalltalk}",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "61--72",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508169",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "In object-oriented programming, polymorphic dispatch
of operations decouples clients from specific providers
of services and allows implementations to be modified
or substituted without affecting clients. The Uniform
Access Principle (UAP) tries to extend these qualities
to resource access by demanding that access to state be
indistinguishable from access to operations. Despite
language features supporting the UAP, the overall goal
of substitutability has not been achieved for either
alternative resources such as keyed storage, files or
web pages, or for alternate access mechanisms: specific
kinds of resources are bound to specific access
mechanisms and vice versa. Changing storage or access
patterns either requires changes to both clients and
service providers and trying to maintain the UAP
imposes significant penalties in terms of
code-duplication and/or performance overhead. We
propose introducing first class identifiers as
polymorphic names for storage locations to solve these
problems. With these Polymorphic Identifiers, we show
that we can provide uniform access to a wide variety of
resource types as well as storage and access
mechanisms, whether parametrized or direct, without
affecting client code, without causing code duplication
or significant performance penalties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Park:2014:AAS,
author = "Changhee Park and Hongki Lee and Sukyoung Ryu",
title = "All about the with statement in {JavaScript}: removing
with statements in {JavaScript} applications",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "73--84",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508173",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "The with statement in JavaScript makes static analysis
of JavaScript applications difficult by introducing a
new scope at run time and thus invalidating lexical
scoping. Therefore, many static approaches to
JavaScript program analysis and the strict mode of
ECMAScript 5 simply disallow the with statement. To
justify exclusion of the with statement, we should
better understand the actual usage patterns of the with
statement. In this paper, we present the usage patterns
of the with statement in real-world JavaScript
applications currently used in the 898 most popular web
sites. We investigate whether we can rewrite the with
statements in each pattern to other statements not
using the with statement. We show that we can rewrite
all the static occurrences of the with statement that
do not have any dynamic code generating functions. Even
though the rewriting process is not applicable to any
dynamically generated with statements, our results are
still promising. Because all the static approaches that
disallow the with statement also disallow dynamic code
generation, such static approaches can allow the with
statement using our rewriting process. We formally
present our rewriting strategy, provide its
implementation, and show its faithfulness using
extensive testing. We believe that removing with
statements will simplify JavaScript program analysis
designs without considering dynamic scope introduction
while imposing fewer syntactic restrictions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Lameed:2014:OMF,
author = "Nurudeen A. Lameed and Laurie J. Hendren",
title = "Optimizing {MATLAB} {\tt feval} with dynamic
techniques",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "85--96",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508174",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/matlab.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "MATLAB is a popular dynamic array-based language used
by engineers, scientists and students worldwide. The
built-in function feval is an important MATLAB feature
for certain classes of numerical programs and solvers
which benefit from having functions as parameters.
Programmers may pass a function name or function handle
to the solver and then the solver uses feval to
indirectly call the function. In this paper, we show
that there are significant performance overheads for
function calls via feval, in both MATLAB interpreters
and JITs. The paper then proposes, implements and
compares two on-the-fly mechanisms for specialization
of feval calls. The first approach uses on-stack
replacement technology, as supported by McVM/McOSR. The
second approach specializes calls of functions with
feval using a combination of runtime input argument
types and values. Experimental results on seven
numerical solvers show that the techniques provide good
performance improvements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Yoo:2014:WRR,
author = "Danny Yoo and Shriram Krishnamurthi",
title = "{Whalesong}: running {Racket} in the browser",
journal = j-SIGPLAN,
volume = "49",
number = "2",
pages = "97--108",
month = feb,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2578856.2508172",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 06:09:05 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "DLS '13 conference proceedings.",
abstract = "JavaScript is the language of the ubiquitous Web, but
it only poorly supports event-driven functional
programs due to its single-threaded, asynchronous
nature and lack of rich control flow operators. We
present Whalesong, a compiler from Racket that
generates JavaScript code that masks these problems. We
discuss the implementation strategy using delimited
continuations, an interface to the DOM, and an FFI for
adapting JavaScript libraries to add new
platform-dependent reactive features. In the process,
we also describe extensions to Racket's functional
event-driven programming model. We also briefly discuss
the implementation details.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '13 conference proceedings.",
}
@Article{Bodik:2014:MBS,
author = "Rastislav Bodik",
title = "Modeling biology with solver-aided programming
languages",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "1--2",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517229",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A good model of a biological cell exposes secrets of
the cell's signaling mechanisms, explaining diseases
and facilitating drug discovery. Modeling cells is
fundamentally a programming problem --- it's
programming because the model is a concurrent program
that simulates the cell, and it's a problem because it
is hard to write a program that reproduces all
experimental observations of the cell faithfully. In
this talk, I will introduce solver-aided programming
languages and show how they ease modeling biology as
well as make programming accessible to non-programmers.
Solver-aided languages come with constructs that
delegate part of the programming problem to a
constraint solver, which can be guided to synthesize
parts of the program, localize its bugs, or act as a
clairvoyant oracle. I will describe our work on
synthesis of stem cell models in c. elegans and then
show how our framework called Rosette can rapidly
implement a solver aided language in several domains,
from programming by demonstration to spatial parallel
programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Erdweg:2014:FEL,
author = "Sebastian Erdweg and Felix Rieger",
title = "A framework for extensible languages",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "3--12",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517210",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Extensible programming languages such as SugarJ or
Racket enable programmers to introduce customary
language features as extensions of the base language.
Traditionally, systems that support language extensions
are either (i) agnostic to the base language or (ii)
only support a single base language. In this paper, we
present a framework for language extensibility that
turns a non-extensible language into an extensible
language featuring library-based extensible syntax,
extensible static analyses, and extensible editor
support. To make a language extensible, our framework
only requires knowledge of the base language's grammar,
the syntax for import statements (which activate
extensions), and how to compile base-language programs.
We have evaluated the generality of our framework by
instantiating it for Java, Haskell, Prolog, JavaScript,
and System F$_{ \omega }$, and by studying existing
module-system features and their support in our
framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Flatt:2014:SRY,
author = "Matthew Flatt",
title = "Submodules in {Racket}: you want it when, again?",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "13--22",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517211",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In an extensible programming language, programmers
write code that must run at different times --- in
particular, at compile time versus run time. The module
system of the Racket programming language enables a
programmer to reason about programs in the face of such
extensibility, because the distinction between run-time
and compile-time phases is built into the language
model. Submodules extend Racket's module system to make
the phase-separation facet of the language extensible.
That is, submodules give programmers the capability to
define new phases, such as `test time' or
\documentation time,' with the same reasoning and
code-management benefits as the built-in distinction
between run time and compile time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Dyer:2014:DVE,
author = "Robert Dyer and Hridesh Rajan and Tien N. Nguyen",
title = "Declarative visitors to ease fine-grained source code
mining with full history on billions of {AST} nodes",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "23--32",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517226",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software repositories contain a vast wealth of
information about software development. Mining these
repositories has proven useful for detecting patterns
in software development, testing hypotheses for new
software engineering approaches, etc. Specifically,
mining source code has yielded significant insights
into software development artifacts and processes.
Unfortunately, mining source code at a large-scale
remains a difficult task. Previous approaches had to
either limit the scope of the projects studied, limit
the scope of the mining task to be more coarse-grained,
or sacrifice studying the history of the code due to
both human and computational scalability issues. In
this paper we address the substantial challenges of
mining source code: (a) at a very large scale; (b) at a
fine-grained level of detail; and (c) with full history
information. To address these challenges, we present
domain-specific language features for source code
mining. Our language features are inspired by
object-oriented visitors and provide a default
depth-first traversal strategy along with two
expressions for defining custom traversals. We provide
an implementation of these features in the Boa
infrastructure for software repository mining and
describe a code generation strategy into Java code. To
show the usability of our domain-specific language
features, we reproduced over 40 source code mining
tasks from two large-scale previous studies in just 2
person-weeks. The resulting code for these tasks show
between $ 2.0 \times $--$ 4.8 \times $ reduction in
code size. Finally we perform a small controlled
experiment to gain insights into how easily mining
tasks written using our language features can be
understood, with no prior training. We show a
substantial number of tasks (77\%) were understood by
study participants, in about 3 minutes per task.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Solodkyy:2014:OPM,
author = "Yuriy Solodkyy and Gabriel {Dos Reis} and Bjarne
Stroustrup",
title = "Open pattern matching for {C++}",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "33--42",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517222",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Pattern matching is an abstraction mechanism that can
greatly simplify source code. We present
functional-style pattern matching for C++ implemented
as a library, called Mach7$^1$. All the patterns are
user-definable, can be stored in variables, passed
among functions, and allow the use of class
hierarchies. As an example, we implement common
patterns used in functional languages. Our approach to
pattern matching is based on compile-time composition
of pattern objects through concepts. This is superior
(in terms of performance and expressiveness) to
approaches based on run-time composition of polymorphic
pattern objects. In particular, our solution allows
mapping functional code based on pattern matching
directly into C++ and produces code that is only a few
percent slower than hand-optimized C++ code. The
library uses an efficient type switch construct,
further extending it to multiple scrutinees and general
patterns. We compare the performance of pattern
matching to that of double dispatch and open
multi-methods in C++.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Martin:2014:TCR,
author = "Marko Martin and Mira Mezini and Sebastian Erdweg",
title = "Template constructors for reusable object
initialization",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "43--52",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517212",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reuse of and abstraction over object initialization
logic is not properly supported in mainstream
object-oriented languages. This may result in
significant amount of boilerplate code and
proliferation of constructors in subclasses. It also
makes it impossible for mixins to extend the
initialization interface of classes they are applied
to. We propose template constructors, which employ
template parameters and pattern matching of them
against signatures of superclass constructors to enable
a one-to-many binding of super-calls. We demonstrate
how template constructors solve the aforementioned
problems. We present a formalization of the concept, a
Java-based implementation, and use cases which exercise
its strengths.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Richard-Foy:2014:EHL,
author = "Julien Richard-Foy and Olivier Barais and Jean-Marc
J{\'e}z{\'e}quel",
title = "Efficient high-level abstractions for {Web}
programming",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "53--60",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517227",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing large Web applications is known to be
difficult. One challenge comes from the fact that the
application's logic is scattered into heterogeneous
clients and servers, making it difficult to share code
between both sides or to move code from one side to the
other. Another challenge is performance: while Web
applications rely on ever more code on the client-side,
they may run on smart phones with limited hardware
capabilities. These two challenges raise the following
problem: how to benefit from high-level languages and
libraries making code complexity easier to manage and
abstracting over the clients and servers differences
without trading this ease of engineering for
performance? This article presents high-level
abstractions defined as deep embedded DSLs in Scala
that can generate efficient code leveraging the
characteristics of both client and server environments.
We compare performance on client-side against other
candidate technologies and against hand written
low-level JavaScript code. Though code written with our
DSL has a high level of abstraction, our benchmark on a
real world application reports that it runs as fast as
hand tuned low-level JavaScript code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Gerakios:2014:RTP,
author = "Prodromos Gerakios and Aggelos Biboudis and Yannis
Smaragdakis",
title = "Reified type parameters using {Java} annotations",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "61--64",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517223",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Java generics are compiled by-erasure: all clients
reuse the same bytecode, with uses of the unknown type
erased. C++ templates are compiled by-expansion: each
type-instantiation of a template produces a different
code definition. The two approaches offer trade-offs on
multiple axes. We propose an extension of Java generics
that allows by-expansion translation relative to
selected type parameters only. This language design
allows sophisticated users to get the best of both
worlds at a fine granularity. Furthermore, our proposal
is based on Java 8 Type Annotations (JSR 308) and the
Checker Framework as an abstraction layer for
controlling compilation without changes to the
internals of a Java compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Schulze:2014:DDP,
author = "Sandro Schulze and J{\"o}rg Liebig and Janet Siegmund
and Sven Apel",
title = "Does the discipline of preprocessor annotations
matter?: a controlled experiment",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "65--74",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517215",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The C preprocessor ( CPP ) is a simple and
language-independent tool, widely used to implement
variable software systems using conditional compilation
(i.e., by including or excluding annotated code).
Although CPP provides powerful means to express
variability, it has been criticized for allowing
arbitrary annotations that break the underlying
structure of the source code. We distinguish between
disciplined annotations, which align with the structure
of the source code, and undisciplined annotations,
which do not. Several studies suggest that especially
the latter type of annotations makes it hard to
(automatically) analyze the code. However, little is
known about whether the type of annotations has an
effect on program comprehension. We address this issue
by means of a controlled experiment with human
subjects. We designed similar tasks for both,
disciplined and undisciplined annotations, to measure
program comprehension. Then, we measured the
performance of the subjects regarding correctness and
response time for solving the tasks. Our results
suggest that there are no differences between
disciplined and undisciplined annotations from a
program-comprehension perspective. Nevertheless, we
observed that finding and correcting errors is a
time-consuming and tedious task in the presence of
preprocessor annotations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Medeiros:2014:IPB,
author = "Fl{\'a}vio Medeiros and M{\'a}rcio Ribeiro and Rohit
Gheyi",
title = "Investigating preprocessor-based syntax errors",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "75--84",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517221",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The C preprocessor is commonly used to implement
variability in program families. Despite the widespread
usage, some studies indicate that the C preprocessor
makes variability implementation difficult and
error-prone. However, we still lack studies to
investigate preprocessor-based syntax errors and
quantify to what extent they occur in practice. In this
paper, we define a technique based on a
variability-aware parser to find syntax errors in
releases and commits of program families. To
investigate these errors, we perform an empirical study
where we use our technique in 41 program family
releases, and more than 51 thousand commits of 8
program families. We find 7 and 20 syntax errors in
releases and commits of program families, respectively.
They are related not only to incomplete annotations,
but also to complete ones. We submit 8 patches to fix
errors that developers have not fixed yet, and they
accept 75\% of them. Our results reveal that the time
developers need to fix the errors varies from days to
years in family repositories. We detect errors even in
releases of well-known and widely used program
families, such as Bash, CVS and Vim. We also classify
the syntax errors into 6 different categories. This
classification may guide developers to avoid them
during development.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Kramer:2014:UDO,
author = "Dean Kramer and Samia Oussena and Peter Komisarczuk
and Tony Clark",
title = "Using document-oriented {GUIs} in dynamic software
product lines",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "85--94",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517214",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic Software Product Line (DSPL) Engineering has
gained interest through its promise of being able to
unify software adaptation whereby software adaptation
can be realised at compile time and runtime. While
previous work has enabled program logic adaptation by
the use of language extensions and platform support,
little attention has been placed on Graphical User
Interface (GUI) variability. Different design patterns
including the Model View Controller are commonly used
in GUI implementation, with GUI documents being used
for declaring the GUI. To handle dynamic GUI
variability currently, the developer needs to implement
GUI refinements using multiple techniques. This paper
proposes a solution for dealing with GUI document
variability, statically and dynamically, in a unified
way. In our approach, we currently use a compile time
method for producing GUI variants, and code
transformations to handle these variants within the
application at runtime. To avoid GUI duplicates, only
GUI variants that are unique, and related to a valid
product configuration, are produced. To validate our
approach, we implemented tool support to enable this
for Android based applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Siegmund:2014:FBP,
author = "Norbert Siegmund and Alexander von Rhein and Sven
Apel",
title = "Family-based performance measurement",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "95--104",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517209",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most contemporary programs are customizable. They
provide many features that give rise to millions of
program variants. Determining which feature selection
yields an optimal performance is challenging, because
of the exponential number of variants. Predicting the
performance of a variant based on previous measurements
proved successful, but induces a trade-off between the
measurement effort and prediction accuracy. We propose
the alternative approach of family-based performance
measurement, to reduce the number of measurements
required for identifying feature interactions and for
obtaining accurate predictions. The key idea is to
create a variant simulator (by translating compile-time
variability to run-time variability) that can simulate
the behavior of all program variants. We use it to
measure performance of individual methods, trace
methods to features, and infer feature interactions
based on the call graph. We evaluate our approach by
means of five feature-oriented programs. On average, we
achieve accuracy of 98\%, with only a single
measurement per customizable program. Observations show
that our approach opens avenues of future research in
different domains, such an feature-interaction
detection and testing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Marek:2014:SRC,
author = "Luk{\'a}s Marek and Stephen Kell and Yudi Zheng and
Lubom{\'\i}r Bulej and Walter Binder and Petr Tuma and
Danilo Ansaloni and Aibek Sarimbekov and Andreas Sewe",
title = "{ShadowVM}: robust and comprehensive dynamic program
analysis for the {Java} platform",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "105--114",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517219",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic analysis tools are often implemented using
instrumentation, particularly on managed runtimes
including the Java Virtual Machine (JVM). Performing
instrumentation robustly is especially complex on such
runtimes: existing frameworks offer limited coverage
and poor isolation, while previous work has shown that
apparently innocuous instrumentation can cause
deadlocks or crashes in the observed application. This
paper describes ShadowVM, a system for
instrumentation-based dynamic analyses on the JVM which
combines a number of techniques to greatly improve both
isolation and coverage. These centre on the offload of
analysis to a separate process; we believe our design
is the first system to enable genuinely full bytecode
coverage on the JVM. We describe a working
implementation, and use a case study to demonstrate its
improved coverage and to evaluate its runtime
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Kolesnikov:2014:CPB,
author = "Sergiy Kolesnikov and Alexander von Rhein and Claus
Hunsen and Sven Apel",
title = "A comparison of product-based, feature-based, and
family-based type checking",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "115--124",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517213",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Analyzing software product lines is difficult, due to
their inherent variability. In the past, several
strategies for product-line analysis have been
proposed, in particular, product-based, feature-based,
and family-based strategies. Despite recent attempts to
conceptually and empirically compare different
strategies, there is no work that empirically compares
all of the three strategies in a controlled setting. We
close this gap by extending a compiler for
feature-oriented programming with support for
product-based, feature-based, and family-based type
checking. We present and discuss the results of a
comparative performance evaluation that we conducted on
a set of 12 feature-oriented, Java-based product lines.
Most notably, we found that the family-based strategy
is superior for all subject product lines: it is
substantially faster, it detects all kinds of errors,
and provides the most detailed information about
them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Ofenbeck:2014:SST,
author = "Georg Ofenbeck and Tiark Rompf and Alen Stojanov and
Martin Odersky and Markus P{\"u}schel",
title = "{Spiral} in {Scala}: towards the systematic
construction of generators for performance libraries",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "125--134",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517228",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program generators for high performance libraries are
an appealing solution to the recurring problem of
porting and optimizing code with every new processor
generation, but only few such generators exist to date.
This is due to not only the difficulty of the design,
but also of the actual implementation, which often
results in an ad-hoc collection of standalone programs
and scripts that are hard to extend, maintain, or
reuse. In this paper we ask whether and which
programming language concepts and features are needed
to enable a more systematic construction of such
generators. The systematic approach we advocate
extrapolates from existing generators: (a) describing
the problem and algorithmic knowledge using one, or
several, domain-specific languages (DSLs), (b)
expressing optimizations and choices as rewrite rules
on DSL programs, (c) designing data structures that can
be configured to control the type of code that is
generated and the data representation used, and (d)
using autotuning to select the best-performing
alternative. As a case study, we implement a small, but
representative subset of Spiral in Scala using the
Lightweight Modular Staging (LMS) framework. The first
main contribution of this paper is the realization of
(c) using type classes to abstract over staging
decisions, i.e. which pieces of a computation are
performed immediately and for which pieces code is
generated. Specifically, we abstract over different
complex data representations jointly with different
code representations including generating loops versus
unrolled code with scalar replacement --- a crucial and
usually tedious performance transformation. The second
main contribution is to provide full support for (a)
and (d) within the LMS framework: we extend LMS to
support translation between different DSLs and
autotuning through search.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Chapin:2014:SNT,
author = "Peter Chapin and Christian Skalka and Scott Smith and
Michael Watson",
title = "{Scalaness\slash nesT}: type specialized staged
programming for sensor networks",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "135--144",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517217",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming wireless embedded networks is challenging
due to severe limitations on processing speed, memory,
and bandwidth. Staged programming can help bridge the
gap between high level code refinement techniques and
efficient device level programs by allowing a first
stage program to specialize device level code. Here we
introduce a two stage programming system for wireless
sensor networks. The first stage program is written in
our extended dialect of Scala, called Scalaness, where
components written in our type safe dialect of nesC,
called nesT, are composed and specialized. Scalaness
programs can dynamically construct TinyOS-compliant
nesT device images that can be deployed to motes. A key
result, called cross-stage type safety, shows that
successful static type checking of a Scalaness program
means no type errors will arise either during
programmatic composition and specialization of WSN
code, or later on the WSN itself. Scalaness has been
implemented through direct modification of the Scala
compiler. Implementation of a staged public-key
cryptography calculation shows the sensor memory
footprint can be significantly reduced by staging.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Sujeeth:2014:FGH,
author = "Arvind K. Sujeeth and Austin Gibbons and Kevin J.
Brown and HyoukJoong Lee and Tiark Rompf and Martin
Odersky and Kunle Olukotun",
title = "Forge: generating a high performance {DSL}
implementation from a declarative specification",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "145--154",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517220",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Domain-specific languages provide a promising path to
automatically compile high-level code to parallel,
heterogeneous, and distributed hardware. However, in
practice high performance DSLs still require
considerable software expertise to develop and force
users into tool-chains that hinder prototyping and
debugging. To address these problems, we present Forge,
a new meta DSL for declaratively specifying high
performance embedded DSLs. Forge provides DSL authors
with high-level abstractions (e.g., data structures,
parallel patterns, effects) for specifying their DSL in
a way that permits high performance. From this
high-level specification, Forge automatically generates
both a na{\"\i}ve Scala library implementation of the
DSL and a high performance version using the Delite DSL
framework. Users of a Forge-generated DSL can prototype
their application using the library version, and then
switch to the Delite version to run on multicore CPUs,
GPUs, and clusters without changing the application
code. Forge-generated Delite DSLs perform within 2x of
hand-optimized C++ and up to $ 40 \times $ better than
Spark, an alternative high-level distributed
programming environment. Compared to a manually
implemented Delite DSL, Forge provides a factor of
$3$--$ 6 \times $ reduction in lines of code and does
not sacrifice any performance. Furthermore, Forge
specifications can be generated from existing Scala
libraries, are easy to maintain, shield DSL developers
from changes in the Delite framework, and enable DSLs
to be retargeted to other frameworks transparently.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Kurilova:2014:SSL,
author = "Darya Kurilova and Derek Rayside",
title = "On the simplicity of synthesizing linked data
structure operations",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "155--158",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517225",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We argue that synthesizing operations on recursive
linked data structures is not as hard as it appears and
is, in fact, within reach of current SAT-based
synthesis techniques --- with the addition of a simple
approach that we describe to decompose the problem into
smaller parts. To generate smaller pieces of code,
i.e., shorter routines, is obviously easier than large
and complex routines, and, also, there is more
potential for automating the code synthesis. In this
paper, we present a code generation algorithm for
synthesizing operations of linked data structures and,
as an example, describe how the proposed algorithm
works to synthesize operations of an AVL tree.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Dhungana:2014:GCD,
author = "Deepak Dhungana and Andreas Falkner and Alois
Haselb{\"o}ck",
title = "Generation of conjoint domain models for
system-of-systems",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "159--168",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517224",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software solutions in complex environments, such as
railway control systems or power plants, are assemblies
of heterogeneous components, which are very large and
complex systems themselves. Interplay of these systems
requires a thorough design of a system-of-systems (SoS)
encompassing the required interactions between the
involved systems. One of the challenges lies in
reconciliation of the domain data structures and
runtime constraints to ensure consistency of the SoS
behavior. In this paper, we present a generative
approach that enables reconciliation of a common
platform based on reusable domain models of the
involved systems. This is comparable to a product line
configuration problem where we generate a common
platform model for all involved systems. We discuss the
specific requirements for model composition in a SoS
context and address them in our approach. In
particular, our approach addresses the operational and
managerial independence of the individual systems and
offers appropriate modeling constructs. We report on
our experiences of applying the approach in several
real world projects and share the lessons learned.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Basso:2014:SLS,
author = "F{\'a}bio Paulo Basso and Raquel Mainardi Pillat and
Toacy Cavalcante Oliveira and Leandro Buss Becker",
title = "Supporting large scale model transformation reuse",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "169--178",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517218",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The growth of applications developed with the support
of model transformations makes reuse a required
practice, specially when applied to transformation
assets (e.g. transformation chains, algorithms, and
configuration files). In order to promote reuse one
must consider the different implementations,
communalities, and variants among these assets. In this
domain, a couple techniques have been used as solutions
to adapt reusable assets for specific needs. However,
so far, no work has discussed their combined use in
real software projects. In this paper, we present a new
tool named WCT, which can be used to adapt
transformation assets. Moreover, through lessons
learned in industry, we address some reuse techniques
devoted to adapt these assets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{An:2014:MDG,
author = "Kyoungho An and Takayuki Kuroda and Aniroddha Gokhale
and Sumant Tambe and Andrea Sorbini",
title = "Model-driven generative framework for automated {OMG
DDS} performance testing in the cloud",
journal = j-SIGPLAN,
volume = "49",
number = "3",
pages = "179--182",
month = mar,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2637365.2517216",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 26 05:58:25 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Object Management Group's (OMG) Data Distribution
Service (DDS) provides many configurable policies which
determine end-to-end quality of service (QoS) of
applications. It is challenging to predict the system's
performance in terms of latencies, throughput, and
resource usage because diverse combinations of QoS
configurations influence QoS of applications in
different ways. To overcome this problem, design-time
formal methods have been applied with mixed success,
but lack of sufficient accuracy in prediction, tool
support, and understanding of formalism has prevented
wider adoption of the formal techniques. A promising
approach to address this challenge is to emulate system
behavior and gather data on the QoS parameters of
interest by experimentation. To realize this approach,
which is preferred over formal methods due to their
limitations in accurately predicting QoS, we have
developed a model-based automatic performance testing
framework with generative capabilities to reduce manual
efforts in generating a large number of relevant QoS
configurations that can be deployed and tested on a
cloud platform. This paper describes our initial
efforts in developing and using this technology.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '13 conference proceedings.",
}
@Article{Vitek:2014:SCR,
author = "Jan Vitek",
title = "{SIGPLAN Chair}'s report",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "1--1",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641640",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gibbons:2014:SVC,
author = "Jeremy Gibbons",
title = "{SIGPLAN Vice-Chair}'s report",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "2--2",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641641",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Black:2014:SSR,
author = "Andrew Black",
title = "{SIGPLAN Secretary}'s report",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "3--3",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641642",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lopes:2014:STR,
author = "Cristina V. Lopes",
title = "{SIGPLAN Treasurer}'s report",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "4--4",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641643",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Dreyer:2014:SA,
author = "Derek Dreyer",
title = "{SIGPLAN} awards",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "5--7",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641644",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lawall:2014:SPA,
author = "Julia Lawall and Cristina V. Lopes",
title = "{SIGPLAN Professional Activities Committee Report}",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "8--8",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641645",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hind:2014:SRH,
author = "Michael Hind",
title = "{SIGPLAN Research Highlights Annual Report}",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "9--9",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641646",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Sewell:2014:PPC,
author = "Peter Sewell",
title = "{POPL 2014 Program Chair}'s report",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "10--26",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641647",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This note describes the POPL 2014 paper selection
process and its rationale.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Lopes:2014:OTP,
author = "Cristina V. Lopes",
title = "The {OOPSLA} two-phase review process",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "27--32",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641648",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Boehm:2014:PP,
author = "Hans Boehm and Jack Davidson and Kathleen Fisher and
Cormac Flanagan and Jeremy Gibbons and Mary Hall and
Graham Hutton and David Padua and Frank Tip and Jan
Vitek and Philip Wadler",
title = "Practices of {PLDI}",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "33--38",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641649",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Fahndrich:2014:SAS,
author = "Manuel F{\"a}hndrich and Francesco Logozzo",
title = "{SAS2013} artifact submission experience report",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "39--40",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641650",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Gay:2014:NLH,
author = "David Gay and Philip Levis and Robert von Behren and
Matt Welsh and Eric Brewer and David Culler",
title = "The {nesC} language: a holistic approach to networked
embedded systems",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "41--51",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641652",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present nesC, a programming language for networked
embedded systems that represent a new design space for
application developers. An example of a networked
embedded system is a sensor network, which consists of
(potentially) thousands of tiny, low-power ``motes,''
each of which execute concurrent, reactive programs
that must operate with severe memory and power
constraints. nesC's contribution is to support the
special needs of this domain by exposing a programming
model that incorporates event-driven execution, a
flexible concurrency model, and component-oriented
application design. Restrictions on the programming
model allow the nesC compiler to perform whole-program
analyses, including data-race detection (which improves
reliability) and aggressive function inlining (which
reduces resource consumption). nesC has been used to
implement TinyOS, a small operating system for sensor
networks, as well as several significant sensor
applications. nesC and TinyOS have been adopted by a
large number of sensor network research groups, and our
experience and evaluation of the language shows that it
is effective at supporting the complex, concurrent
programming style demanded by this new class of deeply
networked systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{LeBotlan:2014:MRM,
author = "Didier {Le Botlan} and Didier R{\'e}my",
title = "{MLF}: raising {ML} to the power of {System F}",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "52--63",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641653",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a type system MLF that generalizes ML with
first-class polymorphism as in System F. Expressions
may contain second-order type annotations. Every
typable expression admits a principal type, which
however depends on type annotations. Principal types
capture all other types that can be obtained by
implicit type instantiation and they can be inferred.
All expressions of ML are well-typed without any
annotations. All expressions of System F can be
mechanically encoded into MLF by dropping all type
abstractions and type applications, and injecting types
of lambda-abstractions into MLF types. Moreover, only
parameters of lambda-abstractions that are used
polymorphically need to remain annotated.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Harris:2014:LSL,
author = "Tim Harris and Keir Fraser",
title = "Language support for lightweight transactions",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "64--78",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641654",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent programming is notoriously difficult.
Current abstractions are intricate and make it hard to
design computer systems that are reliable and scalable.
We argue that these problems can be addressed by moving
to a declarative style of concurrency control in which
programmers directly indicate the safety properties
that they require. In our scheme the programmer demarks
sections of code which execute within lightweight
software-based transactions that commit atomically and
exactly once. These transactions can update shared
data, instantiate objects, invoke library features and
so on. They can also block, waiting for arbitrary
boolean conditions to become true. Transactions which
do not access the same shared memory locations can
commit concurrently. Furthermore, in general, no
performance penalty is incurred for memory accesses
outside transactions. We present a detailed design of
this proposal along with an implementation and
evaluation. We argue that the resulting system (i ) is
easier for mainstream programmers to use, (ii )
prevents lock-based priority-inversion and deadlock
problems and (iii ) can offer performance advantages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Henzinger:2014:AP,
author = "Thomas A. Henzinger and Ranjit Jhala and Rupak
Majumdar and Kenneth L. McMillan",
title = "Abstractions from proofs",
journal = j-SIGPLAN,
volume = "49",
number = "4S",
pages = "79--91",
month = apr,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2641638.2641655",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:36:32 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The success of model checking for large programs
depends crucially on the ability to efficiently
construct parsimonious abstractions. A predicate
abstraction is parsimonious if at each control
location, it specifies only relationships between
current values of variables, and only those which are
required for proving correctness. Previous methods for
automatically refining predicate abstractions until
sufficient precision is obtained do not systematically
construct parsimonious abstractions: predicates usually
contain symbolic variables, and are added heuristically
and often uniformly to many or all control locations at
once. We use Craig interpolation to efficiently
construct, from a given abstract error trace which
cannot be concretized, a parsimonious abstraction that
removes the trace. At each location of the trace, we
infer the relevant predicates as an interpolant between
the two formulas that define the past and the future
segment of the trace. Each interpolant is a
relationship between current values of program
variables, and is relevant only at that particular
program location. It can be found by a linear scan of
the proof of infeasibility of the trace.We develop our
method for programs with arithmetic and pointer
expressions, and call-by-value function calls. For
function calls, Craig interpolation offers a systematic
way of generating relevant predicates that contain only
the local variables of the function and the values of
the formal parameters when the function was called. We
have extended our model checker BLAST with predicate
discovery by Craig interpolation, and applied it
successfully to C programs with more than 130,000 lines
of code, which was not possible with approaches that
build less parsimonious abstractions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Kulkarni:2014:EED,
author = "Prasad A. Kulkarni",
title = "Energy efficient data access techniques",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "1--1",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2602568",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy has become a first class design constraint for
all types of processors. Data accesses contribute to
processor energy usage and can account for up to 25\%
of the total energy used in embedded processors. Using
a set-associative level-one data cache (L1 DC)
organization is particularly energy inefficient as load
operations access all L1 DC tag and data arrays in
parallel to reduce access latency, but the data can
reside in at most one way. Techniques that reduce L1 DC
energy usage at the expense of degrading performance,
such as filter caches, have not been adopted. In this
presentation I will describe various techniques we have
developed to reduce the energy usage for L1 DC accesses
without adversely affecting performance. These
techniques include avoiding unnecessary loads from L1
DC data arrays and a practical data filter cache design
that not only significantly reduces data access energy
usage, but also avoids the traditional execution time
penalty associated with data filter caches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Spink:2014:ECG,
author = "Tom Spink and Harry Wagstaff and Bj{\"o}rn Franke and
Nigel Topham",
title = "Efficient code generation in a region-based dynamic
binary translator",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "3--12",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597810",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Region-based JIT compilation operates on translation
units comprising multiple basic blocks and, possibly
cyclic or conditional, control flow between these. It
promises to reconcile aggressive code optimisation and
low compilation latency in performance-critical dynamic
binary translators. Whilst various region selection
schemes and isolated code optimisation techniques have
been investigated it remains unclear how to best
exploit such regions for efficient code generation.
Complex interactions with indirect branch tables and
translation caches can have adverse effects on
performance if not considered carefully. In this paper
we present a complete code generation strategy for a
region-based dynamic binary translator, which exploits
branch type and control flow profiling information to
improve code quality for the common case. We
demonstrate that using our code generation strategy a
competitive region-based dynamic compiler can be built
on top of the LLVM JIT compilation framework. For the
ARM-V5T target ISA and SPEC CPU 2006 benchmarks we
achieve execution rates of, on average, 867 MIPS and up
to 1323 MIPS on a standard X86 host machine,
outperforming state-of-the-art QEMU-ARM by delivering a
speedup of 264\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Lezuo:2014:COC,
author = "Roland Lezuo and Philipp Paulweber and Andreas Krall",
title = "{CASM}: optimized compilation of abstract state
machines",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "13--22",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597813",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we present CASM, a language based on
Abstract State Machines (ASM), and its optimizing
compiler. ASM is a well-defined (formal) method based
on algebraic concepts. A distinct feature of ASM is its
combination of parallel and sequential execution
semantics. This makes it an excellent choice to
formally specify and verify micro-architectures. We
present a compilation scheme and an implementation of a
runtime system supporting efficient execution of ASM.
After introducing novel analysis techniques we present
optimizations allowing us to eliminate many costly
operations. Benchmark results show that our baseline
compiler is 2-3 magnitudes faster than other ASM
implementations. The optimizations further increase the
performance of the compiled programs up to 264\%. The
achieved performance allows our ASM implementation to
be used with industry-size applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Lozano:2014:CSC,
author = "Roberto Casta{\~n}eda Lozano and Mats Carlsson and
Gabriel Hjort Blindell and Christian Schulte",
title = "Combinatorial spill code optimization and ultimate
coalescing",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "23--32",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597815",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a novel combinatorial model that
integrates global register allocation based on ultimate
coalescing, spill code optimization, register packing,
and multiple register banks with instruction scheduling
(including VLIW). The model exploits alternative
temporaries that hold the same value as a new concept
for ultimate coalescing and spill code optimization.
The paper presents Unison as a code generator based on
the model and advanced solving techniques using
constraint programming. Thorough experiments using
MediaBench and a processor (Hexagon) that are typical
for embedded systems demonstrate that Unison: is robust
and scalable; generates faster code than LLVM (up to
41\% with a mean improvement of 7\%); possibly
generates optimal code (for 29\% of the experiments);
effortlessly supports different optimization criteria
(code size on par with LLVM). Unison is significant as
it addresses the same aspects as traditional code
generation algorithms, yet is based on a simple
integrated model and robustly can generate optimal
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Ballabriga:2014:CRP,
author = "Cl{\'e}ment Ballabriga and Lee Kee Chong and Abhik
Roychoudhury",
title = "Cache-related preemption delay analysis for {FIFO}
caches",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "33--42",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597814",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hard real-time systems are typically composed of
multiple tasks, subjected to timing constraints. To
guarantee that these constraints will be respected, the
Worst-Case Response Time (WCRT) of each task is needed.
In the presence of systems supporting preemptible
tasks, we need to take into account the time lost due
to task preemption. A major part of this delay is the
Cache-Related Preemption Delay (CRPD), which represents
the penalties due to cache block evictions by
preempting tasks. Previous works on CRPD have focused
on caches with Least Recently used (LRU) replacement
policy. However, for many real-world processors such as
ARM9 or ARM11, the use of First-in-first-out (FIFO)
cache replacement policy is common. In this paper, we
propose an approach to compute CRPD in the presence of
instruction caches with FIFO replacement policy. We use
the result of a FIFO instruction cache categorization
analysis to account for single-task cache misses, and
we model as an Integer Linear Programming (ILP) system
the additional preemption-related cache misses. We
study the effect of cache related timing anomalies, our
work is the first to deal with the effect of timing
anomalies in CRPD computation. We also present a WCRT
computation method that takes advantage of the fact
that our computed CRPD does not increase linearly with
respect to the preemption count. We evaluated our
method by computing the CRPD with realistic benchmarks
(e.g. drone control application, robot controller
application), under various cache configuration
parameters. The experimentation shows that our method
is able to compute tight CRPD bound for benchmark
tasks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Henry:2014:HCW,
author = "Julien Henry and Mihail Asavoae and David Monniaux and
Claire Ma{\"\i}za",
title = "How to compute worst-case execution time by
optimization modulo theory and a clever encoding of
program semantics",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "43--52",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597817",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In systems with hard real-time constraints, it is
necessary to compute upper bounds on the worst-case
execution time (WCET) of programs; the closer the bound
to the real WCET, the better. This is especially the
case of synchronous reactive control loops with a fixed
clock; the WCET of the loop body must not exceed the
clock period. We compute the WCET (or at least a close
upper bound thereof) as the solution of an optimization
modulo theory problem that takes into account the
semantics of the program, in contrast to other methods
that compute the longest path whether or not it is
feasible according to these semantics. Optimization
modulo theory extends satisfiability modulo theory
(SMT) to maximization problems. Immediate encodings of
WCET problems into SMT yield formulas intractable for
all current production-grade solvers --- this is
inherent to the DPLL(T) approach to SMT implemented in
these solvers. By conjoining some appropriate ``cuts''
to these formulas, we considerably reduce the
computation time of the SMT-solver. We experimented our
approach on a variety of control programs, using the
OTAWA analyzer both as baseline and as underlying
microarchitectural analysis for our analysis, and show
notable improvement on the WCET bound on a variety of
benchmarks and control programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Zheng:2014:WAD,
author = "Wenguang Zheng and Hui Wu",
title = "{WCET}: aware dynamic instruction cache locking",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "53--62",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597820",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Caches are widely used in embedded systems to bridge
the increasing speed gap between processors and
off-chip memory. However, caches make it significantly
harder to compute the WCET(Worst Case Execution Time)
of a task. To alleviate this problem, cache locking has
been proposed. We investigate the I-cache locking
problem, and propose a WCET-aware, min-cut based
dynamic instruction cache locking approach for reducing
the WCET of a single task. We have implemented our
approach and compared it with the two state-of-the-art
cache locking approaches by using a set of benchmarks
from the MRTC benchmark suite. The experimental results
show that our approach achieves the average
improvements of 41\%, 15\% and 7\% over the partial
locking approach for the 256B, 512B and 1KB caches,
respectively, and 7\%, 18\% and 17\% over the longest
path based dynamic locking approach for the 256B, 512B
and 1KB caches, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Martins:2014:ECO,
author = "Luiz G. A. Martins and Ricardo Nobre and Alexandre C.
B. Delbem and Eduardo Marques and Jo{\~a}o M. P.
Cardoso",
title = "Exploration of compiler optimization sequences using
clustering-based selection",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "63--72",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597821",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Due to the large number of optimizations provided in
modern compilers and to compiler optimization specific
opportunities, a Design Space Exploration (DSE) is
necessary to search for the best sequence of compiler
optimizations for a given code fragment (e.g.,
function). As this exploration is a complex and time
consuming task, in this paper we present DSE strategies
to select optimization sequences to both improve the
performance of each function and reduce the exploration
time. The DSE is based on a clustering approach which
groups functions with similarities and then explore the
reduced search space provided by the optimizations
previously suggested for the functions in each group.
The identification of similarities between functions
uses a data mining method which is applied to a
symbolic code representation of the source code. The
DSE process uses the reduced set identified by
clustering in two ways: as the design space or as the
initial configuration. In both ways, the adoption of a
pre-selection based on clustering allows the use of
simple and fast DSE algorithms. Our experiments for
evaluating the effectiveness of the proposed approach
address the exploration of compiler optimization
sequences considering 49 compilation passes and
targeting a Xilinx MicroBlaze processor, and were
performed aiming performance improvements for 41
functions. Experimental results reveal that the use of
our new clustering-based DSE approach achieved a
significant reduction on the total exploration time of
the search space (18x over a Genetic Algorithm approach
for DSE) at the same time that important performance
speedups (43\% over the baseline) were obtained by the
optimized codes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Chandramohan:2014:PDP,
author = "Kiran Chandramohan and Michael F. P. O'Boyle",
title = "Partitioning data-parallel programs for heterogeneous
{MPSoCs}: time and energy design space exploration",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "73--82",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597822",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multiprocessor System-on-Chips(MPSoCs) are now widely
used in embedded devices. MPSoCs typically contain a
range of specialised processors. Alongside the CPU,
there are microcontrollers, DSPs and other hardware
accelerators. Programming these MPSoCs is difficult
because of the difference in instruction-set
architecture (ISA) and disjoint address spaces. In this
paper we consider MPSoCs as a target for individual
benchmarks. We examine how data-parallel programs can
be optimally mapped to heterogeneous multicores for
different criteria such as performance, power and
energy. We investigate the partitioning of seven
benchmarks taken from DSPstone, UTDSP and Polybench
suites. Based on design space exploration we show that
the best partition depends on compiler optimization
level, program, input size and crucially optimization
criteria. We develop a straightforward approach that
attempts to select the best partitioning for a given
program. On average it achieves speedups of 2.2x and
energy improvements of 1.45x on the OMAP 4430
platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Guo:2014:EED,
author = "Minyi Guo",
title = "Energy efficient data access and storage through
{HW\slash SW} co-design",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "83--83",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2602569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Massive energy consumption has become a major factor
for the design and implementation of datacenters. This
has led to numerous academic and industrial efforts to
improve the energy efficiency of datacenter
infrastructures. As a result, in state-of-the-art
datacenter facilities, over 80\% of power is now
consumed by servers themselves. Historically, the
processor has dominated energy consumption in the
server. However, as processors have become more energy
efficient, their contribution has been decreasing. On
the contrary, energy consumed by data accesses and
storage is growing, since multi- and many-core severs
are requiring increased main memory bandwidth/capacity,
large register file and large-scale storage system.
Accordingly, energy consumed by data accesses and
storage approaching or even surpassing that consumed by
processors in many servers. For example, it has been
reported that main memory contributes to as much as
40-46\% of total energy consumption in server
applications. In this talk, we present our continuing
efforts to improve the energy efficiency of data
accesses and storage. We study on a series of
approaches with hardware-software cooperation to save
energy consumption of on-chip memory, register file,
main memory and storage devices for embedded systems,
multi- and many-core servers, respectively. Experiments
with a large set of workloads show the accuracy of our
analytical models and the effectiveness of our
optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{vonKoch:2014:EFS,
author = "Tobias J. K. Edler von Koch and Bj{\"o}rn Franke and
Pranav Bhandarkar and Anshuman Dasgupta",
title = "Exploiting function similarity for code size
reduction",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "85--94",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597811",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For cost-sensitive or memory constrained embedded
systems, code size is at least as important as
performance. Consequently, compact code generation has
become a major focus of attention within the compiler
community. In this paper we develop a pragmatic, yet
effective code size reduction technique, which exploits
structural similarity of functions. It avoids code
duplication through merging of similar functions and
targeted insertion of control flow to resolve small
differences. We have implemented our purely software
based and platform-independent technique in the LLVM
compiler frame work and evaluated it against the SPEC
CPU2006 benchmarks and three target platforms: Intel
x86, ARM based Qualcomm Krait(TM), and Qualcomm
Hexagon(TM) DSP. We demonstrate that code size for SPEC
CPU2006 can be reduced by more than 550KB on x86. This
corresponds to an overall code size reduction of 4\%,
and up to 11.5\% for individual programs. Overhead
introduced by additional control flow is compensated
for by better I-cache performance of the compacted
programs. We also show that identifying suitable
candidates and subsequent merging of functions can be
implemented efficiently.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Roy:2014:AAS,
author = "Pooja Roy and Rajarshi Ray and Chundong Wang and Weng
Fai Wong",
title = "{ASAC}: automatic sensitivity analysis for approximate
computing",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "95--104",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597812",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The approximation based programming paradigm is
especially attractive for developing error-resilient
applications, targeting low power embedded devices. It
allows for program data to be computed and stored
approximately for better energy efficiency. The
duration of battery in the smartphones, tablets, etc.
is generally more of a concern to users than an
application's accuracy or fidelity beyond certain
acceptable quality of service. Therefore, relaxing
accuracy to improve energy efficiency is an attractive
trade-off when permissible by the application's domain.
Recent works suggest source code annotations and type
qualifiers to facilitate safe approximate computation
and data manipulation. It requires rewriting of
programs or the availability of source codes for
annotations. This may not be feasible as real-world
applications tend to be large, with source code that is
not readily available. In this paper, we propose a
novel sensitivity analysis that automatically generates
annotations for programs for the purpose of approximate
computing. Our framework, ASAC, extracts information
about the sensitivity of the output with respect to
program data. We show that the program output is
sensitive to only a subset of program data that we deem
critical, and hence must be precise. The rest of the
data can be computed and stored approximately.We
evaluated our analysis on a range of applications, and
achieved a 86\% accuracy compared to manual annotations
by programmers. We validated our analysis by showing
that the applications are within the acceptable QoS
threshold if we approximate the non-critical data.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Chaudhary:2014:ESC,
author = "Sandeep Chaudhary and Sebastian Fischmeister and Lin
Tan",
title = "{em-SPADE}: a compiler extension for checking rules
extracted from processor specifications",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "105--114",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597823",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traditional compilers ignore processor specifications,
thousands of pages of which are available for modern
processors. To bridge this gap, em-SPADE analyzes
processor specifications and creates processor-specific
rules to reduce low-level programming errors. This work
shows the potential of automatically analyzing
processor- and other hardware specifications to detect
low-level programming errors at compile time. em-SPADE
is a compiler extension to automatically detect
software bugs in low-level programs. From processor
specifications, a preprocessor extracts target-specific
rules such as register use and read-only or reserved
registers. A special LLVM pass then uses these rules to
detect incorrect register assignments. Our experiments
with em-SPADE have correctly extracted 652 rules from
15 specifications and consequently found 20 bugs in ten
software projects. The work is generalizable to other
types of specifications and shows the clear prospects
of using hardware specifications to enhance
compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Beaugnon:2014:VVO,
author = "Ulysse Beaugnon and Alexey Kravets and Sven van
Haastregt and Riyadh Baghdadi and David Tweed and Javed
Absar and Anton Lokhmotov",
title = "{VOBLA}: a vehicle for optimized basic linear
algebra",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "115--124",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597818",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present VOBLA, a domain-specific language designed
for programming linear algebra libraries. VOBLA is
compiled to PENCIL, a domain independent intermediate
language designed for efficient mapping to accelerator
architectures such as GPGPUs. PENCIL is compiled to
efficient, platform-specific OpenCL code using
techniques based on the polyhedral model. This approach
addresses both the programmer productivity and
performance portability concerns associated with
accelerator programming. We demonstrate our approach by
using VOBLA to implement a BLAS library. We have
evaluated the performance of OpenCL code generated
using our compilation flow on ARM Mali, AMD Radeon, and
AMD Opteron platforms. The generated code is currently
on average 1.9x slower than highly hand-optimized
OpenCL code, but on average 8.1x faster than
straightforward OpenCL code. Given that the VOBLA
coding takes significantly less effort compared to
hand-optimizing OpenCL code, we believe our approach
leads to improved productivity and performance
portability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Bebelis:2014:FSP,
author = "Vagelis Bebelis and Pascal Fradet and Alain Girault",
title = "A framework to schedule parametric dataflow
applications on many-core platforms",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "125--134",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597819",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dataflow models, such as SDF, have been effectively
used to program streaming applications while ensuring
their liveness and boundedness. Yet, industrials are
struggling to design the next generation of high
definition video applications using these models. Such
applications demand new features such as parameters to
express dynamic input/output rate and topology
modifications. Their implementation on modern many-core
platforms is a major challenge. We tackle these
problems by proposing a generic and flexible framework
to schedule streaming applications designed in a
parametric dataflow model of computation. We generate
parallel as soon as possible (ASAP) schedules targeted
to the new STHORM many-core platform of
STMicroelectronics. Furthermore, these schedules can be
customized using user-defined ordering and resource
constraints. The parametric dataflow graph is
associated with generic or user-defined specific
constraints aimed at minimizing timing, buffer sizes,
power consumption, or other criteria. The scheduling
algorithm executes with minimal overhead and can be
adapted to different scheduling policies just by adding
some constraints. The safety of both the dataflow graph
and constraints can be checked statically and all
schedules are guaranteed to be bounded and deadlock
free. We illustrate the scheduling capabilities of our
approach using a real world application: the VC-1 video
decoder for high definition video streaming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Lee:2014:IPL,
author = "Jinyong Lee and Jongwon Lee and Jongeun Lee and
Yunheung Paek",
title = "Improving performance of loops on {DIAM-based} {VLIW}
architectures",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "135--144",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597825",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent studies show that very long instruction word
(VLIW) architectures, which inherently have wide
datapath (e.g. 128 or 256 bits for one VLIW instruction
word), can benefit from dynamic implied addressing mode
(DIAM) and can achieve lower power consumption and
smaller code size with a small performance overhead.
Such overhead, which is claimed to be small, is mainly
caused by the execution of additionally generated
special instructions for conveying information that
cannot be encoded in reduced instruction bit-width. In
this paper, however, we show that the performance
impact of applying DIAM on VLIW architecture cannot be
overlooked expecially when applications possess high
level of instruction level parallelism (ILP), which is
mostly the case for loops because of the result of
aggressive code scheduling. We also propose a way to
relieve the performance degradation especially focusing
on loops since loops spend almost 90\% of total
execution time in programs and tend to have high ILP.
We first implement the original DIAM compilation
technique in a compiler, and augment it with the
proposed loop optimization scheme to show that ours can
clearly alleviate the performance loss caused by the
excessive number of additional instructions, with the
help of slightly modified hardware. Moreover, the
well-known loop unrolling scheme, which would produce
denser code in loops at the cost of substantial code
size bloating, is integrated into our compiler. The
experiment result shows that the loop unrolling
technique, combined with our augmented DIAM scheme,
produces far better code in terms of performance with
quite an acceptable amount of code increase.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Wingbermuehle:2014:SMS,
author = "Joseph G. Wingbermuehle and Ron K. Cytron and Roger D.
Chamberlain",
title = "Superoptimization of memory subsystems",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "145--154",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597816",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The disparity in performance between processors and
main memories has led computer architects to
incorporate large cache hierarchies in modern
computers. Because these cache hierarchies are designed
to be general-purpose, they may not provide the best
possible performance for a given application. In this
paper, we determine a memory subsystem well suited for
a given application and main memory by discovering a
memory subsystem comprised of caches,scratchpads, and
other components that are combined to provide better
performance. We draw motivation from the
superoptimization of instruction sequences, which
successfully finds unusually clever instruction
sequences for programs. Targeting both ASIC and FPGA
devices, we show that it is possible to discover
unusual memory subsystems that provide performance
improvements over a typical memory subsystem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Kim:2014:LBL,
author = "Hongjune Kim and Seonmyeong Bak and Jaejin Lee",
title = "Lightweight and block-level concurrent sweeping for
{JavaScript} garbage collection",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "155--164",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597824",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript is a dynamic-typed language originally
developed for the purpose of giving dynamic client-side
behaviors to web pages. It is mainly used in web
application development and because of its popularity
and rapid development style it is now also used in
other types of applications. Increasing data processing
requirements and growing usage in more resource-limited
environments, such as mobile devices, has given demands
for JavaScript implementations to handle memory more
efficiently through garbage collection. Since
aggressive use of time consuming operations in garbage
collection can slow down the JavaScript application,
there is a trade-off relationship between the
effectiveness and the execution time of garbage
collection. In this paper, we present a lightweight,
block-level concurrent sweeping mechanism for a
mark-and-sweep garbage collector. The sweeping process
is detached to an additional thread to eagerly collect
free memory blocks and recycle it. To minimize the
overhead that comes from the synchronization between
the mutator thread and the new sweeping thread, we have
chosen a course grained block-level collecting scheme
for sweeping. To avoid contention that comes from
object destruction, we execute the object destruction
phase concurrently with the foreground marking phase.
We have implemented our algorithm in JavaScript Core
(JSC) engine embedded in the WebKit browser that uses a
variant of mark-and-sweep algorithm to manage
JavaScript objects. The original garbage collection
implementation performs lazy sweeping that cannot reuse
the free blocks. We evaluate our implementation on an
ARM-based mobile system and show that memory
utilization of the system is significantly improved
without performance degradation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '14 conference proceedings.",
}
@Article{Padua:2014:WEI,
author = "David Padua",
title = "What exactly is inexact computation good for?",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "1--1",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2604001",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Our willingness to deliberately trade accuracy of
computing systems for significant resource savings,
notably energy consumption, got a boost from two
directions. First, energy (or power, the more popularly
used measure) consumption started emerging as a serious
hurdle to our ability to continue scaling the
complexity of processors, and thus enable ever richer
computing applications. This ``energy hurdle'' spanned
the gamut from large data-centers to portable embedded
computing systems. Second, many believed that an engine
of growth that supported scaling, captured by Gordon
Moore's remarkable prophecy (Moore's law), was headed
towards an irrevocable cliff edge --- when this
happens, our ability to produce computing systems whose
hardware would support precise or exact computing would
diminish greatly. In this talk which emphasizes the
physical and hardware layers of abstraction where all
of these troubles start (after all energy is rooted in
thermodynamics), I will first review reasons that
compelled and encouraged us to consider trading
accuracy for energy savings deliberately resulting in
inexact computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Kuper:2014:TPE,
author = "Lindsey Kuper and Aaron Todd and Sam Tobin-Hochstadt
and Ryan R. Newton",
title = "Taming the parallel effect zoo: extensible
deterministic parallelism with {LVish}",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "2--14",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594312",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A fundamental challenge of parallel programming is to
ensure that the observable outcome of a program remains
deterministic in spite of parallel execution.
Language-level enforcement of determinism is possible,
but existing deterministic-by-construction parallel
programming models tend to lack features that would
make them applicable to a broad range of problems.
Moreover, they lack extensibility: it is difficult to
add or change language features without breaking the
determinism guarantee. The recently proposed LVars
programming model, and the accompanying LVish Haskell
library, took a step toward broadly-applicable
guaranteed-deterministic parallel programming. The
LVars model allows communication through shared
monotonic data structures to which information can only
be added, never removed, and for which the order in
which information is added is not observable. LVish
provides a Par monad for parallel computation that
encapsulates determinism-preserving effects while
allowing a more flexible form of communication between
parallel tasks than previous guaranteed-deterministic
models provided. While applying LVar-based programming
to real problems using LVish, we have identified and
implemented three capabilities that extend its reach:
inflationary updates other than least-upper-bound
writes; transitive task cancellation; and parallel
mutation of non-overlapping memory locations. The
unifying abstraction we use to add these capabilities
to LVish---without suffering added complexity or cost
in the core LVish implementation, or compromising
determinism---is a form of monad transformer, extended
to handle the Par monad. With our extensions, LVish
provides the most broadly applicable
guaranteed-deterministic parallel programming interface
available to date. We demonstrate the viability of our
approach both with traditional parallel benchmarks and
with results from a real-world case study: a
bioinformatics application that we parallelized using
our extended version of LVish.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Surendran:2014:TDR,
author = "Rishi Surendran and Raghavan Raman and Swarat
Chaudhuri and John Mellor-Crummey and Vivek Sarkar",
title = "Test-driven repair of data races in structured
parallel programs",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "15--25",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594335",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A common workflow for developing parallel software is
as follows: (1) start with a sequential program, (2)
identify subcomputations that should be converted to
parallel tasks, (3) insert synchronization to achieve
the same semantics as the sequential program, and
repeat steps (2) and (3) as needed to improve
performance. Though this is not the only approach to
developing parallel software, it is sufficiently common
to warrant special attention as parallel programming
becomes ubiquitous. This paper focuses on automating
step (3), which is usually the hardest step for
developers who lack expertise in parallel programming.
Past solutions to the problem of repairing parallel
programs have used static-only or dynamic-only
approaches, both of which incur significant limitations
in practice. Static approaches can guarantee soundness
in many cases but are limited in precision when
analyzing medium or large-scale software with accesses
to pointer-based data structures in multiple
procedures. Dynamic approaches are more precise, but
their proposed repairs are limited to a single input
and are not reflected back in the original source
program. In this paper, we introduce a hybrid
static+dynamic test-driven approach to repairing data
races in structured parallel programs. Our approach
includes a novel coupling between static and dynamic
analyses. First, we execute the program on a concrete
test input and determine the set of data races for this
input dynamically. Next, we compute a set of ``finish''
placements that prevent these races and also respects
the static scoping rules of the program while
maximizing parallelism. Empirical results on standard
benchmarks and student homework submissions from a
parallel computing course establish the effectiveness
of our approach with respect to compile-time overhead,
precision, and performance of the repaired code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Stork:2014:APB,
author = "Sven Stork and Karl Naden and Joshua Sunshine and
Manual Mohr and Alcides Fonseca and Paulo Marques and
Jonathan Aldrich",
title = "{{\AE}minium}: a permission based
concurrent-by-default programming language approach",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "26--26",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594344",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The aim of {\AE}MINIUM is to study the implications of
having a concurrent-by-default programming language.
This includes language design, runtime system,
performance and software engineering considerations. We
conduct our study through the design of the
concurrent-by-default {\AE}MINIUM programming language.
{\AE}MINIUM leverages the permission flow of object and
group permissions through the program to validate the
program's correctness and to automatically infer a
possible parallelization strategy via a dataflow graph.
{\AE}MINIUM supports not only fork-join parallelism but
more general dataflow patterns of parallelism. In this
paper we present a formal system, called \mu
{\AE}MINIUM, modeling the core concepts of {\AE}MINIUM.
\mu {\AE}MINIUM's static type system is based on
Featherweight Java with {\AE}MINIUM-specific
extensions. Besides checking for correctness
{\AE}MINIUM's type system it also uses the permission
flow to compute a potential parallel execution strategy
for the program. \mu {\AE}MINIUM's dynamic semantics
use a concurrent-by-default evaluation approach. Along
with the formal system we present its soundness proof.
We provide a full description of the implementation
along with the description of various optimization
techniques we used. We implemented {\AE}MINIUM as an
extension of the Plaid programming language, which has
first-class support for permissions built-in. The
{\AE}MINIUM implementation and all case studies are
publicly available under the General Public License. We
use various case studies to evaluate {\AE}MINIUM's
applicability and to demonstrate that {\AE}MINIUM
parallelized code has performance improvements compared
to its sequential counterpart. We chose to use case
studies from common domains or problems that are known
to benefit from parallelization, to show that
{\AE}MINIUM is powerful enough to encode them. We
demonstrate through a webserver application, which
evaluates {\AE}MINIUM's impact on latency-bound
applications, that {\AE}MINIUM can achieve a 70\%
performance improvement over the sequential
counterpart. In another case study we chose to
implement a dictionary function to evaluate
{\AE}MINIUM's capabilities to express essential data
structures. Our evaluation demonstrates that
{\AE}MINIUM can be used to express parallelism in such
data-structures and that the performance benefits scale
with the amount of annotation effort which is put into
the implementation. We chose an integral
computationally example to evaluate pure functional
programming and computational intensive use cases. Our
experiments show that {\AE}MINIUM is capable of
extracting parallelism from functional code and
achieving performance improvements up to the limits of
Plaid's inherent performance bounds. Overall, we hope
that the work helps to advance concurrent programming
in modern programming environments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Jagannathan:2014:ARV,
author = "Suresh Jagannathan and Vincent Laporte and Gustavo
Petri and David Pichardie and Jan Vitek",
title = "Atomicity refinement for verified compilation",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "27--27",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594346",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider the verified compilation of high-level
managed languages like Java or C\# whose intermediate
representations provide support for shared-memory
synchronization and automatic memory management. In
this environment, the interactions between application
threads and the language runtime (e.g., the garbage
collector) are regulated by compiler-injected code
snippets. Example of snippets include allocation fast
paths among others. In our TOPLAS paper we propose a
refinement-based proof methodology that precisely
relates concurrent code expressed at different
abstraction levels, cognizant throughout of the relaxed
memory semantics of the underlying processor. Our
technique allows the compiler writer to reason
compositionally about the atomicity of low-level
concurrent code used to implement managed services. We
illustrate our approach with examples taken from the
verification of a concurrent garbage collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Biswas:2014:DES,
author = "Swarnendu Biswas and Jipeng Huang and Aritra Sengupta
and Michael D. Bond",
title = "{DoubleChecker}: efficient sound and precise atomicity
checking",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "28--39",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594323",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Atomicity is a key correctness property that allows
programmers to reason about code regions in isolation.
However, programs often fail to enforce atomicity
correctly, leading to atomicity violations that are
difficult to detect. Dynamic program analysis can
detect atomicity violations based on an atomicity
specification, but existing approaches slow programs
substantially. This paper presents DoubleChecker, a
novel sound and precise atomicity checker whose key
insight lies in its use of two new cooperating dynamic
analyses. Its imprecise analysis tracks cross-thread
dependences soundly but imprecisely with significantly
better performance than a fully precise analysis. Its
precise analysis is more expensive but only needs to
process a subset of the execution identified as
potentially involved in atomicity violations by the
imprecise analysis. If DoubleChecker operates in
single-run mode, the two analyses execute in the same
program run, which guarantees soundness and precision
but requires logging program accesses to pass from the
imprecise to the precise analysis. In multi-run mode,
the first program run executes only the imprecise
analysis, and a second run executes both analyses.
Multi-run mode trades accuracy for performance; each
run of multi-run mode outperforms single-run mode, but
can potentially miss violations. We have implemented
DoubleChecker and an existing state-of-the-art
atomicity checker called Velodrome in a
high-performance Java virtual machine. DoubleChecker's
single-run mode significantly outperforms Velodrome,
while still providing full soundness and precision.
DoubleChecker's multi-run mode improves performance
further, without significantly impacting soundness in
practice. These results suggest that DoubleChecker's
approach is a promising direction for improving the
performance of dynamic atomicity checking over prior
work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Alglave:2014:HCM,
author = "Jade Alglave and Luc Maranget and Michael Tautschnig",
title = "Herding cats: modelling, simulation, testing, and
data-mining for weak memory",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "40--40",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594347",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There is a joke where a physicist and a mathematician
are asked to herd cats. The physicist starts with an
infinitely large pen which he reduces until it is of
reasonable diameter yet contains all the cats. The
mathematician builds a fence around himself and
declares the outside to be the inside. Defining memory
models is akin to herding cats: both the physicist's or
mathematician's attitudes are tempting, but neither can
go without the other.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Rompf:2014:SPJ,
author = "Tiark Rompf and Arvind K. Sujeeth and Kevin J. Brown
and HyoukJoong Lee and Hassan Chafi and Kunle
Olukotun",
title = "Surgical precision {JIT} compilers",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "41--52",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594316",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Just-in-time (JIT) compilation of running programs
provides more optimization opportunities than offline
compilation. Modern JIT compilers, such as those in
virtual machines like Oracle's HotSpot for Java or
Google's V8 for JavaScript, rely on dynamic profiling
as their key mechanism to guide optimizations. While
these JIT compilers offer good average performance,
their behavior is a black box and the achieved
performance is highly unpredictable. In this paper, we
propose to turn JIT compilation into a precision tool
by adding two essential and generic metaprogramming
facilities: First, allow programs to invoke JIT
compilation explicitly. This enables controlled
specialization of arbitrary code at run-time, in the
style of partial evaluation. It also enables the JIT
compiler to report warnings and errors to the program
when it is unable to compile a code path in the
demanded way. Second, allow the JIT compiler to call
back into the program to perform compile-time
computation. This lets the program itself define the
translation strategy for certain constructs on the fly
and gives rise to a powerful JIT macro facility that
enables ``smart'' libraries to supply domain-specific
compiler optimizations or safety checks. We present
Lancet, a JIT compiler framework for Java bytecode that
enables such a tight, two-way integration with the
running program. Lancet itself was derived from a
high-level Java bytecode interpreter: staging the
interpreter using LMS (Lightweight Modular Staging)
produced a simple bytecode compiler. Adding abstract
interpretation turned the simple compiler into an
optimizing compiler. This fact provides compelling
evidence for the scalability of the staged-interpreter
approach to compiler construction. In the case of
Lancet, JIT macros also provide a natural interface to
existing LMS-based toolchains such as the Delite
parallelism and DSL framework, which can now serve as
accelerator macros for arbitrary JVM bytecode.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Schkufza:2014:SOF,
author = "Eric Schkufza and Rahul Sharma and Alex Aiken",
title = "Stochastic optimization of floating-point programs
with tunable precision",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "53--64",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594302",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The aggressive optimization of floating-point
computations is an important problem in
high-performance computing. Unfortunately,
floating-point instruction sets have complicated
semantics that often force compilers to preserve
programs as written. We present a method that treats
floating-point optimization as a stochastic search
problem. We demonstrate the ability to generate reduced
precision implementations of Intel's handwritten C
numeric library which are up to 6 times faster than the
original code, and achieve end-to-end speedups of over
30\% on a direct numeric simulation and a ray tracer by
optimizing kernels that can tolerate a loss of
precision while still remaining correct. Because these
optimizations are mostly not amenable to formal
verification using the current state of the art, we
present a stochastic search technique for
characterizing maximum error. The technique comes with
an asymptotic guarantee and provides strong evidence of
correctness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Stock:2014:FED,
author = "Kevin Stock and Martin Kong and Tobias Grosser and
Louis-No{\"e}l Pouchet and Fabrice Rastello and J.
Ramanujam and P. Sadayappan",
title = "A framework for enhancing data reuse via associative
reordering",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "65--76",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594342",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The freedom to reorder computations involving
associative operators has been widely recognized and
exploited in designing parallel algorithms and to a
more limited extent in optimizing compilers. In this
paper, we develop a novel framework utilizing the
associativity and commutativity of operations in
regular loop computations to enhance register reuse.
Stencils represent a particular class of important
computations where the optimization framework can be
applied to enhance performance. We show how stencil
operations can be implemented to better exploit
register reuse and reduce load/stores. We develop a
multi-dimensional retiming formalism to characterize
the space of valid implementations in conjunction with
other program transformations. Experimental results
demonstrate the effectiveness of the framework on a
collection of high-order stencils.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{DeVito:2014:FCR,
author = "Zachary DeVito and Daniel Ritchie and Matt Fisher and
Alex Aiken and Pat Hanrahan",
title = "First-class runtime generation of high-performance
types using exotypes",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "77--88",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594307",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce exotypes, user-defined types that combine
the flexibility of meta-object protocols in
dynamically-typed languages with the performance
control of low-level languages. Like objects in dynamic
languages, exotypes are defined programmatically at
run-time, allowing behavior based on external data such
as a database schema. To achieve high performance, we
use staged programming to define the behavior of an
exotype during a runtime compilation step and implement
exotypes in Terra, a low-level staged programming
language. We show how exotype constructors compose, and
use exotypes to implement high-performance libraries
for serialization, dynamic assembly, automatic
differentiation, and probabilistic programming. Each
exotype achieves expressiveness similar to libraries
written in dynamically-typed languages but implements
optimizations that exceed the performance of existing
libraries written in low-level statically-typed
languages. Though each implementation is significantly
shorter, our serialization library is 11 times faster
than Kryo, and our dynamic assembler is 3--20 times
faster than Google's Chrome assembler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Greenman:2014:GFB,
author = "Ben Greenman and Fabian Muehlboeck and Ross Tate",
title = "Getting {F}-bounded polymorphism into shape",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "89--99",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594308",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a way to restrict recursive inheritance
without sacrificing the benefits of F-bounded
polymorphism. In particular, we distinguish two new
concepts, materials and shapes, and demonstrate through
a survey of 13.5 million lines of open-source
generic-Java code that these two concepts never
actually overlap in practice. With this Material-Shape
Separation, we prove that even na{\"\i}ve type-checking
algorithms are sound and complete, some of which
address problems that were unsolvable even under the
existing proposals for restricting inheritance. We
illustrate how the simplicity of our design reflects
the design intuitions employed by programmers and
potentially enables new features coming into demand for
upcoming programming languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Simon:2014:OIF,
author = "Axel Simon",
title = "Optimal inference of fields in row-polymorphic
records",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "100--111",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594313",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Flexible records are a powerful concept in type
systems that form the basis of, for instance, objects
in dynamically typed languages. One caveat of using
flexible records is that a program may try to access a
record field that does not exist. We present a type
inference algorithm that checks for these runtime
errors. The novelty of our algorithm is that it
satisfies a clear notion of completeness: The inferred
types are optimal in the sense that type annotations
cannot increase the set of typeable programs. Under
certain assumptions, our algorithm guarantees the
following stronger property: it rejects a program if
and only if it contains a path from an empty record to
a field access on which the field has not been added.
We derive this optimal algorithm by abstracting a
semantics to types. The derived inference rules use a
novel combination of type terms and Boolean functions
that retains the simplicity of unification-based type
inference but adds the ability of Boolean functions to
express implications, thereby addressing the challenge
of combining implications and types. By following our
derivation method, we show how various operations such
as record concatenation and branching if a field exists
lead to Boolean satisfiability problems of different
complexity. Analogously, we show that more expressive
type systems give rise to SMT problems. On the
practical side, we present an implementation of the
select and update operations and give practical
evidence that these are sufficient in real-world
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Sampson:2014:EVP,
author = "Adrian Sampson and Pavel Panchekha and Todd Mytkowicz
and Kathryn S. McKinley and Dan Grossman and Luis
Ceze",
title = "Expressing and verifying probabilistic assertions",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "112--122",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594294",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traditional assertions express correctness properties
that must hold on every program execution. However,
many applications have probabilistic outcomes and
consequently their correctness properties are also
probabilistic (e.g., they identify faces in images,
consume sensor data, or run on unreliable hardware).
Traditional assertions do not capture these correctness
properties. This paper proposes that programmers
express probabilistic correctness properties with
probabilistic assertions and describes a new
probabilistic evaluation approach to efficiently verify
these assertions. Probabilistic assertions are Boolean
expressions that express the probability that a
property will be true in a given execution rather than
asserting that the property must always be true. Given
either specific inputs or distributions on the input
space, probabilistic evaluation verifies probabilistic
assertions by first performing distribution extraction
to represent the program as a Bayesian network.
Probabilistic evaluation then uses statistical
properties to simplify this representation to
efficiently compute assertion probabilities directly or
with sampling. Our approach is a mix of both static and
dynamic analysis: distribution extraction statically
builds and optimizes the Bayesian network
representation and sampling dynamically interprets this
representation. We implement our approach in a tool
called Mayhap for C and C++ programs. We evaluate
expressiveness, correctness, and performance of Mayhap
on programs that use sensors, perform approximate
computation, and obfuscate data for privacy. Our case
studies demonstrate that probabilistic assertions
describe useful correctness properties and that Mayhap
efficiently verifies them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Borges:2014:CSS,
author = "Mateus Borges and Antonio Filieri and Marcelo d'Amorim
and Corina S. Pasareanu and Willem Visser",
title = "Compositional solution space quantification for
probabilistic software analysis",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "123--132",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594329",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Probabilistic software analysis aims at quantifying
how likely a target event is to occur during program
execution. Current approaches rely on symbolic
execution to identify the conditions to reach the
target event and try to quantify the fraction of the
input domain satisfying these conditions. Precise
quantification is usually limited to linear
constraints, while only approximate solutions can be
provided in general through statistical approaches.
However, statistical approaches may fail to converge to
an acceptable accuracy within a reasonable time. We
present a compositional statistical approach for the
efficient quantification of solution spaces for
arbitrarily complex constraints over bounded
floating-point domains. The approach leverages interval
constraint propagation to improve the accuracy of the
estimation by focusing the sampling on the regions of
the input domain containing the sought solutions.
Preliminary experiments show significant improvement on
previous approaches both in results accuracy and
analysis time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Hur:2014:SPP,
author = "Chung-Kil Hur and Aditya V. Nori and Sriram K.
Rajamani and Selva Samuel",
title = "Slicing probabilistic programs",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "133--144",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594303",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Probabilistic programs use familiar notation of
programming languages to specify probabilistic models.
Suppose we are interested in estimating the
distribution of the return expression r of a
probabilistic program P. We are interested in slicing
the probabilistic program P and obtaining a simpler
program Sli( P ) which retains only those parts of P
that are relevant to estimating r, and elides those
parts of P that are not relevant to estimating r. We
desire that the Sli transformation be both correct and
efficient. By correct, we mean that P and Sli( P ) have
identical estimates on r. By efficient, we mean that
estimation over Sli( P ) be as fast as possible. We
show that the usual notion of program slicing, which
traverses control and data dependencies backward from
the return expression r, is unsatisfactory for
probabilistic programs, since it produces incorrect
slices on some programs and sub-optimal ones on others.
Our key insight is that in addition to the usual
notions of control dependence and data dependence that
are used to slice non-probabilistic programs, a new
kind of dependence called observe dependence arises
naturally due to observe statements in probabilistic
programs. We propose a new definition of Sli( P ) which
is both correct and efficient for probabilistic
programs, by including observe dependence in addition
to control and data dependences for computing slices.
We prove correctness mathematically, and we demonstrate
efficiency empirically. We show that by applying the
Sli transformation as a pre-pass, we can improve the
efficiency of probabilistic inference, not only in our
own inference tool R2, but also in other systems for
performing inference such as Church and Infer.NET.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Cai:2014:TCH,
author = "Yufei Cai and Paolo G. Giarrusso and Tillmann Rendel
and Klaus Ostermann",
title = "A theory of changes for higher-order languages:
incrementalizing $ \lambda $-calculi by static
differentiation",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "145--155",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594304",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "If the result of an expensive computation is
invalidated by a small change to the input, the old
result should be updated incrementally instead of
reexecuting the whole computation. We incrementalize
programs through their derivative. A derivative maps
changes in the program's input directly to changes in
the program's output, without reexecuting the original
program. We present a program transformation taking
programs to their derivatives, which is fully static
and automatic, supports first-class functions, and
produces derivatives amenable to standard optimization.
We prove the program transformation correct in Agda for
a family of simply-typed $ \lambda $-calculi,
parameterized by base types and primitives. A precise
interface specifies what is required to incrementalize
the chosen primitives. We investigate performance by a
case study: We implement in Scala the program
transformation, a plugin and improve performance of a
nontrivial program by orders of magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Hammer:2014:ACD,
author = "Matthew A. Hammer and Khoo Yit Phang and Michael Hicks
and Jeffrey S. Foster",
title = "{Adapton}: composable, demand-driven incremental
computation",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "156--166",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594324",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many researchers have proposed programming languages
that support incremental computation (IC), which allows
programs to be efficiently re-executed after a small
change to the input. However, existing implementations
of such languages have two important drawbacks. First,
recomputation is oblivious to specific demands on the
program output; that is, if a program input changes,
all dependencies will be recomputed, even if an
observer no longer requires certain outputs. Second,
programs are made incremental as a unit, with little or
no support for reusing results outside of their
original context, e.g., when reordered. To address
these problems, we present $ \lambda_{ic}^{cdd} $, a
core calculus that applies a demand-driven semantics to
incremental computation, tracking changes in a
hierarchical fashion in a novel demanded computation
graph. $ \lambda_{ic}^{cdd} $ also formalizes an
explicit separation between inner, incremental
computations and outer observers. This combination
ensures $ \lambda_{ic}^{cdd} $ programs only recompute
computations as demanded by observers, and allows inner
computations to be reused more liberally. We present
Adapton, an OCaml library implementing $
\lambda_{ic}^{cdd} $. We evaluated Adapton on a range
of benchmarks, and found that it provides reliable
speedups, and in many cases dramatically outperforms
state-of-the-art IC approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Aung:2014:SS,
author = "Min Aung and Susan Horwitz and Rich Joiner and Thomas
Reps",
title = "Specialization slicing",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "167--167",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594345",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we investigate opportunities to be
gained from broadening the definition of program
slicing. A major inspiration for our work comes from
the field of partial evaluation, in which a wide
repertoire of techniques have been developed for
specializing programs. While slicing can also be
harnessed for specializing programs, the kind of
specialization obtainable via slicing has heretofore
been quite restricted, compared to the kind of
specialization allowed in partial evaluation. In
particular, most slicing algorithms are what the
partial-evaluation community calls monovariant: each
program element of the original program generates at
most one element in the answer. In contrast,
partial-evaluation algorithms can be polyvariant, i.e.,
one program element in the original program may
correspond to more than one element in the specialized
program. The full paper appears in ACM TOPLAS 36 (2),
2014.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Hoare:2014:LCP,
author = "Tony Hoare",
title = "Laws of concurrent programming",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "168--168",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2604002",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The talk extends the Laws of Programming [1] by four
laws governing concurrent composition of programs. This
operator is associative and commutative and
distributive through union; and it has the same unit
(do nothing) as sequential composition. Furthermore,
sequential and concurrent composition distribute
through each other, in accordance with an exchange law;
this permits an implementation of concurrency by
partial interleaving.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Sridharan:2014:AEP,
author = "Srinath Sridharan and Gagan Gupta and Gurindar S.
Sohi",
title = "Adaptive, efficient, parallel execution of parallel
programs",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "169--180",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594292",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Future multicore processors will be heterogeneous, be
increasingly less reliable, and operate in dynamically
changing operating conditions. Such environments will
result in a constantly varying pool of hardware
resources which can greatly complicate the task of
efficiently exposing a program's parallelism onto these
resources. Coupled with this uncertainty is the diverse
set of efficiency metrics that users may desire. This
paper proposes Varuna, a system that dynamically,
continuously, rapidly and transparently adapts a
program's parallelism to best match the instantaneous
capabilities of the hardware resources while satisfying
different efficiency metrics. Varuna is applicable to
both multithreaded and task-based programs and can be
seamlessly inserted between the program and the
operating system without needing to change the source
code of either. We demonstrate Varuna's effectiveness
in diverse execution environments using unaltered C/C++
parallel programs from various benchmark suites.
Regardless of the execution environment, Varuna always
outperformed the state-of-the-art approaches for the
efficiency metrics considered.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Gupta:2014:GPR,
author = "Gagan Gupta and Srinath Sridharan and Gurindar S.
Sohi",
title = "Globally precise-restartable execution of parallel
programs",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "181--192",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594306",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging trends in computer design and use are likely
to make exceptions, once rare, the norm, especially as
the system size grows. Due to exceptions, arising from
hardware faults, approximate computing, dynamic
resource management, etc., successful and error-free
execution of programs may no longer be assured. Yet,
designers will want to tolerate the exceptions so that
the programs execute completely, efficiently and
without external intervention. Modern computers easily
handle exceptions in sequential programs, using precise
interrupts. But they are ill-equipped to handle
exceptions in parallel programs, which are growing in
prevalence. In this work we introduce the notion of
globally precise-restartable execution of parallel
programs, analogous to precise-interruptible execution
of sequential programs. We present a software runtime
recovery system based on the approach to handle
exceptions in suitably-written parallel programs.
Qualitative and quantitative analyses show that the
proposed system scales with the system size, especially
when exceptions are frequent, unlike the conventional
checkpoint-and-recovery method.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Mitra:2014:AAP,
author = "Subrata Mitra and Ignacio Laguna and Dong H. Ahn and
Saurabh Bagchi and Martin Schulz and Todd Gamblin",
title = "Accurate application progress analysis for large-scale
parallel debugging",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "193--203",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594336",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Debugging large-scale parallel applications is
challenging. In most HPC applications, parallel tasks
progress in a coordinated fashion, and thus a fault in
one task can quickly propagate to other tasks, making
it difficult to debug. Finding the least-progressed
tasks can significantly reduce the effort to identify
the task where the fault originated. However, existing
approaches for detecting them suffer low accuracy and
large overheads; either they use imprecise static
analysis or are unable to infer progress dependence
inside loops. We present a loop-aware
progress-dependence analysis tool, Prodometer, which
determines relative progress among parallel tasks via
dynamic analysis. Our fault-injection experiments
suggest that its accuracy and precision are over 90\%
for most cases and that it scales well up to 16,384 MPI
tasks. Further, our case study shows that it
significantly helped diagnosing a perplexing error in
MPI, which only manifested at large scale.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Tavarageri:2014:CAD,
author = "Sanket Tavarageri and Sriram Krishnamoorthy and P.
Sadayappan",
title = "Compiler-assisted detection of transient memory
errors",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "204--215",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594298",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The probability of bit flips in hardware memory
systems is projected to increase significantly as
memory systems continue to scale in size and
complexity. Effective hardware-based error detection
and correction require that the complete data path,
involving all parts of the memory system, be protected
with sufficient redundancy. First, this may be costly
to employ on commodity computing platforms, and second,
even on high-end systems, protection against multi-bit
errors may be lacking. Therefore, augmenting hardware
error detection schemes with software techniques is of
considerable interest. In this paper, we consider
software-level mechanisms to comprehensively detect
transient memory faults. We develop novel compile-time
algorithms to instrument application programs with
checksum computation codes to detect memory errors.
Unlike prior approaches that employ checksums on
computational and architectural states, our scheme
verifies every data access and works by tracking
variables as they are produced and consumed.
Experimental evaluation demonstrates that the proposed
comprehensive error detection solution is viable as a
completely software-only scheme. We also demonstrate
that with limited hardware support, overheads of error
detection can be further reduced.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Le:2014:CVE,
author = "Vu Le and Mehrdad Afshari and Zhendong Su",
title = "Compiler validation via equivalence modulo inputs",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "216--226",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594334",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce equivalence modulo inputs (EMI), a
simple, widely applicable methodology for validating
optimizing compilers. Our key insight is to exploit the
close interplay between (1) dynamically executing a
program on some test inputs and (2) statically
compiling the program to work on all possible inputs.
Indeed, the test inputs induce a natural collection of
the original program's EMI variants, which can help
differentially test any compiler and specifically
target the difficult-to-find miscompilations. To create
a practical implementation of EMI for validating C
compilers, we profile a program's test executions and
stochastically prune its unexecuted code. Our extensive
testing in eleven months has led to 147 confirmed,
unique bug reports for GCC and LLVM alone. The majority
of those bugs are miscompilations, and more than 100
have already been fixed. Beyond testing compilers, EMI
can be adapted to validate program transformation and
analysis systems in general. This work opens up this
exciting, new direction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Long:2014:ARE,
author = "Fan Long and Stelios Sidiroglou-Douskos and Martin
Rinard",
title = "Automatic runtime error repair and containment via
recovery shepherding",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "227--238",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594337",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a system, RCV, for enabling software
applications to survive divide-by-zero and
null-dereference errors. RCV operates directly on
off-the-shelf, production, stripped x86 binary
executables. RCV implements recovery shepherding, which
attaches to the application process when an error
occurs, repairs the execution, tracks the repair
effects as the execution continues, contains the repair
effects within the application process, and detaches
from the process after all repair effects are flushed
from the process state. RCV therefore incurs negligible
overhead during the normal execution of the
application. We evaluate RCV on all divide-by-zero and
null-dereference errors available in the CVE database
[2] from January 2011 to March 2013 that (1) provide
publicly-available inputs that trigger the error which
(2) we were able to use to trigger the reported error
in our experimental environment. We collected a total
of 18 errors in seven real world applications,
Wireshark, the FreeType library, Claws Mail,
LibreOffice, GIMP, the PHP interpreter, and Chromium.
For 17 of the 18 errors, RCV enables the application to
continue to execute to provide acceptable output and
service to its users on the error-triggering inputs.
For 13 of the 18 errors, the continued RCV execution
eventually flushes all of the repair effects and RCV
detaches to restore the application to full clean
functionality. We perform a manual analysis of the
a source code relevant to our benchmark errors, which
indicates that for 11 of the 18 errors the RCV and
later patched versions produce identical or equivalent
results on all inputs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Zhang:2014:ARP,
author = "Xin Zhang and Ravi Mangal and Radu Grigore and Mayur
Naik and Hongseok Yang",
title = "On abstraction refinement for program analyses in
{Datalog}",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "239--248",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594327",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A central task for a program analysis concerns how to
efficiently find a program abstraction that keeps only
information relevant for proving properties of
interest. We present a new approach for finding such
abstractions for program analyses written in Datalog.
Our approach is based on counterexample-guided
abstraction refinement: when a Datalog analysis run
fails using an abstraction, it seeks to generalize the
cause of the failure to other abstractions, and pick a
new abstraction that avoids a similar failure. Our
solution uses a boolean satisfiability formulation that
is general, complete, and optimal: it is independent of
the Datalog solver, it generalizes the failure of an
abstraction to as many other abstractions as possible,
and it identifies the cheapest refined abstraction to
try next. We show the performance of our approach on a
pointer analysis and a typestate analysis, on eight
real-world Java benchmark programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Zhang:2014:HTB,
author = "Xin Zhang and Ravi Mangal and Mayur Naik and Hongseok
Yang",
title = "Hybrid top-down and bottom-up interprocedural
analysis",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "249--258",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594328",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Interprocedural static analyses are broadly classified
into top-down and bottom-up, depending upon how they
compute, instantiate, and reuse procedure summaries.
Both kinds of analyses are challenging to scale:
top-down analyses are hindered by ineffective reuse of
summaries whereas bottom-up analyses are hindered by
inefficient computation and instantiation of summaries.
This paper presents a hybrid approach Swift that
combines top-down and bottom-up analyses in a manner
that gains their benefits without suffering their
drawbacks. Swift is general in that it is parametrized
by the top-down and bottom-up analyses it combines. We
show an instantiation of Swift on a type-state analysis
and evaluate it on a suite of 12 Java programs of size
60-250 KLOC each. Swift outperforms both conventional
approaches, finishing on all the programs while both of
those approaches fail on the larger programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Arzt:2014:FPC,
author = "Steven Arzt and Siegfried Rasthofer and Christian
Fritz and Eric Bodden and Alexandre Bartel and Jacques
Klein and Yves {Le Traon} and Damien Octeau and Patrick
McDaniel",
title = "{FlowDroid}: precise context, flow, field,
object-sensitive and lifecycle-aware taint analysis for
{Android} apps",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "259--269",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594299",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's smartphones are a ubiquitous source of private
and confidential data. At the same time, smartphone
users are plagued by carelessly programmed apps that
leak important data by accident, and by malicious apps
that exploit their given privileges to copy such data
intentionally. While existing static taint-analysis
approaches have the potential of detecting such data
leaks ahead of time, all approaches for Android use a
number of coarse-grain approximations that can yield
high numbers of missed leaks and false alarms. In this
work we thus present FlowDroid, a novel and highly
precise static taint analysis for Android applications.
A precise model of Android's lifecycle allows the
analysis to properly handle callbacks invoked by the
Android framework, while context, flow, field and
object-sensitivity allows the analysis to reduce the
number of false alarms. Novel on-demand algorithms help
FlowDroid maintain high efficiency and precision at the
same time. We also propose DroidBench, an open test
suite for evaluating the effectiveness and accuracy of
taint-analysis tools specifically for Android apps. As
we show through a set of experiments using SecuriBench
Micro, DroidBench, and a set of well-known Android test
applications, FlowDroid finds a very high fraction of
data leaks while keeping the rate of false positives
low. On DroidBench, FlowDroid achieves 93\% recall and
86\% precision, greatly outperforming the commercial
tools IBM AppScan Source and Fortify SCA. FlowDroid
successfully finds leaks in a subset of 500 apps from
Google Play and about 1,000 malware apps from the
VirusShare project.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Carbonneaux:2014:EEV,
author = "Quentin Carbonneaux and Jan Hoffmann and Tahina
Ramananandro and Zhong Shao",
title = "End-to-end verification of stack-space bounds for {C}
programs",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "270--281",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594301",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Verified compilers guarantee the preservation of
semantic properties and thus enable formal verification
of programs at the source level. However, important
quantitative properties such as memory and time usage
still have to be verified at the machine level where
interactive proofs tend to be more tedious and
automation is more challenging. This article describes
a framework that enables the formal verification of
stack-space bounds of compiled machine code at the C
level. It consists of a verified CompCert-based
compiler that preserves quantitative properties, a
verified quantitative program logic for interactive
stack-bound development, and a verified stack analyzer
that automatically derives stack bounds during
compilation. The framework is based on event traces
that record function calls and returns. The source
language is CompCert Clight and the target language is
x86 assembly. The compiler is implemented in the Coq
Proof Assistant and it is proved that crucial
properties of event traces are preserved during
compilation. A novel quantitative Hoare logic is
developed to verify stack-space bounds at the CompCert
Clight level. The quantitative logic is implemented in
Coq and proved sound with respect to event traces
generated by the small-step semantics of CompCert
Clight. Stack-space bounds can be proved at the source
level without taking into account low-level details
that depend on the implementation of the compiler. The
compiler fills in these low-level details during
compilation and generates a concrete stack-space bound
that applies to the produced machine code. The verified
stack analyzer is guaranteed to automatically derive
bounds for code with non-recursive functions. It
generates a derivation in the quantitative logic to
ensure soundness as well as interoperability with
interactively developed stack bounds. In an
experimental evaluation, the developed framework is
used to obtain verified stack-space bounds for micro
benchmarks as well as real system code. The examples
include the verified operating-system kernel CertiKOS,
parts of the MiBench embedded benchmark suite, and
programs from the CompCert benchmarks. The derived
bounds are close to the measured stack-space usage of
executions of the compiled programs on a Linux x86
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Ball:2014:VTV,
author = "Thomas Ball and Nikolaj Bj{\o}rner and Aaron Gember
and Shachar Itzhaky and Aleksandr Karbyshev and Mooly
Sagiv and Michael Schapira and Asaf Valadarsky",
title = "{VeriCon}: towards verifying controller programs in
software-defined networks",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "282--293",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594317",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software-defined networking (SDN) is a new paradigm
for operating and managing computer networks. SDN
enables logically-centralized control over network
devices through a ``controller'' software that operates
independently from the network hardware, and can be
viewed as the network operating system. Network
operators can run both inhouse and third-party SDN
programs (often called applications) on top of the
controller, e.g., to specify routing and access control
policies. SDN opens up the possibility of applying
formal methods to prove the correctness of computer
networks. Indeed, recently much effort has been
invested in applying finite state model checking to
check that SDN programs behave correctly. However, in
general, scaling these methods to large networks is
challenging and, moreover, they cannot guarantee the
absence of errors. We present VeriCon, the first system
for verifying that an SDN program is correct on all
admissible topologies and for all possible (infinite)
sequences of network events. VeriCon either confirms
the correctness of the controller program on all
admissible network topologies or outputs a concrete
counterexample. VeriCon uses first-order logic to
specify admissible network topologies and desired
network-wide invariants, and then implements classical
Floyd-Hoare-Dijkstra deductive verification using Z3.
Our preliminary experience indicates that VeriCon is
able to rapidly verify correctness, or identify bugs,
for a large repertoire of simple core SDN programs.
VeriCon is compositional, in the sense that it verifies
the correctness of execution of any single network
event w.r.t. the specified invariant, and can thus
scale to handle large programs. To relieve the burden
of specifying inductive invariants from the programmer,
VeriCon includes a separate procedure for inferring
invariants, which is shown to be effective on simple
controller programs. We view VeriCon as a first step en
route to practical mechanisms for verifying
network-wide invariants of SDN programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Logozzo:2014:VMV,
author = "Francesco Logozzo and Shuvendu K. Lahiri and Manuel
F{\"a}hndrich and Sam Blackshear",
title = "Verification modulo versions: towards usable
verification",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "294--304",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594326",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce Verification Modulo Versions (VMV), a new
static analysis technique for reducing the number of
alarms reported by static verifiers while providing
sound semantic guarantees. First, VMV extracts semantic
environment conditions from a base program P.
Environmental conditions can either be sufficient
conditions (implying the safety of P) or necessary
conditions (implied by the safety of P). Then, VMV
instruments a new version of the program, P', with the
inferred conditions. We prove that we can use (i)
sufficient conditions to identify abstract regressions
of P' w.r.t. P; and (ii) necessary conditions to prove
the relative correctness of P' w.r.t. P. We show that
the extraction of environmental conditions can be
performed at a hierarchy of abstraction levels
(history, state, or call conditions) with each
subsequent level requiring a less sophisticated
matching of the syntactic changes between P' and P.
Call conditions are particularly useful because they
only require the syntactic matching of entry points and
callee names across program versions. We have
implemented VMV in a widely used static analysis and
verification tool. We report our experience on two
large code bases and demonstrate a substantial
reduction in alarms while additionally providing
relative correctness guarantees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Dimitrov:2014:CRD,
author = "Dimitar Dimitrov and Veselin Raychev and Martin Vechev
and Eric Koskinen",
title = "Commutativity race detection",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "305--315",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594322",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces the concept of a commutativity
race. A commutativity race occurs in a given execution
when two library method invocations can happen
concurrently yet they do not commute. Commutativity
races are an elegant concept enabling reasoning about
concurrent interaction at the library interface. We
present a dynamic commutativity race detector. Our
technique is based on a novel combination of vector
clocks and a structural representation automatically
obtained from a commutativity specification.
Conceptually, our work can be seen as generalizing
classical read-write race detection. We also present a
new logical fragment for specifying commutativity
conditions. This fragment is expressive, yet guarantees
a constant number of comparisons per method invocation
rather than linear with unrestricted specifications. We
implemented our analyzer and evaluated it on real-world
applications. Experimental results indicate that our
analysis is practical: it discovered harmful
commutativity races with overhead comparable to
state-of-the-art, low-level race detectors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Maiya:2014:RDA,
author = "Pallavi Maiya and Aditya Kanade and Rupak Majumdar",
title = "Race detection for {Android} applications",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "316--325",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594311",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming environments for smartphones expose a
concurrency model that combines multi-threading and
asynchronous event-based dispatch. While this enables
the development of efficient and feature-rich
applications, unforeseen thread interleavings coupled
with non-deterministic reorderings of asynchronous
tasks can lead to subtle concurrency errors in the
applications. In this paper, we formalize the
concurrency semantics of the Android programming model.
We further define the happens-before relation for
Android applications, and develop a dynamic race
detection technique based on this relation. Our
relation generalizes the so far independently studied
happens-before relations for multi-threaded programs
and single-threaded event-driven programs.
Additionally, our race detection technique uses a model
of the Android runtime environment to reduce false
positives. We have implemented a tool called
DroidRacer. It generates execution traces by
systematically testing Android applications and detects
data races by computing the happens-before relation on
the traces. We analyzed 15 Android applications
including popular applications such as Facebook,
Twitter and K-9 Mail. Our results indicate that data
races are prevalent in Android applications, and that
DroidRacer is an effective tool to identify data
races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Hsiao:2014:RDE,
author = "Chun-Hung Hsiao and Jie Yu and Satish Narayanasamy and
Ziyun Kong and Cristiano L. Pereira and Gilles A. Pokam
and Peter M. Chen and Jason Flinn",
title = "Race detection for event-driven mobile applications",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "326--336",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594330",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mobile systems commonly support an event-based model
of concurrent programming. This model, used in popular
platforms such as Android, naturally supports mobile
devices that have a rich array of sensors and user
input modalities. Unfortunately, most existing tools
for detecting concurrency errors of parallel programs
focus on a thread-based model of concurrency. If one
applies such tools directly to an event-based program,
they work poorly because they infer false dependencies
between unrelated events handled sequentially by the
same thread. In this paper we present a race detection
tool named CAFA for event-driven mobile systems. CAFA
uses the causality model that we have developed for the
Android event-driven system. A novel contribution of
our model is that it accounts for the causal order due
to the event queues, which are not accounted for in
past data race detectors. Detecting races based on
low-level races between memory accesses leads to a
large number of false positives. CAFA overcomes this
problem by checking for races between high-level
operations. We discuss our experience in using CAFA for
finding and understanding a number of known and unknown
harmful races in open-source Android applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Huang:2014:MSP,
author = "Jeff Huang and Patrick O'Neil Meredith and Grigore
Rosu",
title = "Maximal sound predictive race detection with control
flow abstraction",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "337--348",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594315",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the numerous static and dynamic program
analysis techniques in the literature, data races
remain one of the most common bugs in modern concurrent
software. Further, the techniques that do exist either
have limited detection capability or are unsound,
meaning that they report false positives. We present a
sound race detection technique that achieves a provably
higher detection capability than existing sound
techniques. A key insight of our technique is the
inclusion of abstracted control flow information into
the execution model, which increases the space of the
causal model permitted by classical happens-before or
causally-precedes based detectors. By encoding the
control flow and a minimal set of feasibility
constraints as a group of first-order logic formulae,
we formulate race detection as a constraint solving
problem. Moreover, we formally prove that our
formulation achieves the maximal possible detection
capability for any sound dynamic race detector with
respect to the same input trace under the sequential
consistency memory model. We demonstrate via extensive
experimentation that our technique detects more races
than the other state-of-the-art sound race detection
techniques, and that it is scalable to executions of
real world concurrent applications with tens of
millions of critical events. These experiments also
revealed several previously unknown races in real
systems (e.g., Eclipse) that have been confirmed or
fixed by the developers. Our tool is also adopted by
Eclipse developers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{David:2014:TBC,
author = "Yaniv David and Eran Yahav",
title = "Tracelet-based code search in executables",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "349--360",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594343",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We address the problem of code search in executables.
Given a function in binary form and a large code base,
our goal is to statically find similar functions in the
code base. Towards this end, we present a novel
technique for computing similarity between functions.
Our notion of similarity is based on decomposition of
functions into tracelets: continuous, short, partial
traces of an execution. To establish tracelet
similarity in the face of low-level compiler
transformations, we employ a simple rewriting engine.
This engine uses constraint solving over alignment
constraints and data dependencies to match registers
and memory addresses between tracelets, bridging the
gap between tracelets that are otherwise similar. We
have implemented our approach and applied it to find
matches in over a million binary functions. We compare
tracelet matching to approaches based on n-grams and
graphlets and show that tracelet matching obtains
dramatically better precision and recall.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Pombrio:2014:RLE,
author = "Justin Pombrio and Shriram Krishnamurthi",
title = "Resugaring: lifting evaluation sequences through
syntactic sugar",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "361--371",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594319",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Syntactic sugar is pervasive in language technology.
It is used to shrink the size of a core language; to
define domain-specific languages; and even to let
programmers extend their language. Unfortunately,
syntactic sugar is eliminated by transformation, so the
resulting programs become unfamiliar to authors. Thus,
it comes at a price: it obscures the relationship
between the user's source program and the program being
evaluated. We address this problem by showing how to
compute reduction steps in terms of the surface syntax.
Each step in the surface language emulates one or more
steps in the core language. The computed steps hide the
transformation, thus maintaining the abstraction
provided by the surface language. We make these
statements about emulation and abstraction precise,
prove that they hold in our formalism, and verify part
of the system in Coq. We have implemented this work and
applied it to three very different languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{vonHanxleden:2014:SSC,
author = "Reinhard von Hanxleden and Bj{\"o}rn Duderstadt and
Christian Motika and Steven Smyth and Michael Mendler
and Joaqu{\'\i}n Aguado and Stephen Mercer and Owen
O'Brien",
title = "{SCCharts}: sequentially constructive statecharts for
safety-critical applications: {HW\slash SW}-synthesis
for a conservative extension of synchronous
statecharts",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "372--383",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594310",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new visual language, SCCharts, designed
for specifying safety-critical reactive systems.
SCCharts use a statechart notation and provide
determinate concurrency based on a synchronous model of
computation (MoC), without restrictions common to
previous synchronous MoCs. Specifically, we lift
earlier limitations on sequential accesses to shared
variables, by leveraging the sequentially constructive
MoC. The semantics and key features of SCCharts are
defined by a very small set of elements, the Core
SCCharts, consisting of state machines plus fork/join
concurrency. We also present a compilation chain that
allows efficient synthesis of software and hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{DAntoni:2014:FTB,
author = "Loris D'Antoni and Margus Veanes and Benjamin Livshits
and David Molnar",
title = "{Fast}: a transducer-based language for tree
manipulation",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "384--394",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594309",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Tree automata and tree transducers are used in a wide
range of applications in software engineering, from XML
processing to language type-checking. While these
formalisms are of immense practical use, they can only
model finite alphabets, and since many real-world
applications operate over infinite domains such as
integers, this is often a limitation. To overcome this
problem we augment tree automata and transducers with
symbolic alphabets represented as parametric theories.
Admitting infinite alphabets makes these models more
general and succinct than their classical counterparts.
Despite this, we show how the main operations, such as
composition and language equivalence, remain computable
given a decision procedure for the alphabet theory. We
introduce a high-level language called Fast that acts
as a front-end for the above formalisms. Fast supports
symbolic alphabets through tight integration with
state-of-the-art satisfiability modulo theory (SMT)
solvers. We demonstrate our techniques on practical
case studies, covering a wide range of applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Misra:2014:PPC,
author = "Jayadev Misra",
title = "A personal perspective on concurrency",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "395--395",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2604003",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This talk will describe a view of concurrency, the
author's own, as it has evolved since the late 1970s.
Early notions of concurrency were intimately tied with
physical hardware and speeding up of computations,
which proved to be an impediment to the development of
a logical theory of concurrency. In collaboration with
K. Mani Chandy, the author developed a theory called
UNITY that combined a programming notation with a
verification logic to describe a large class of
fundamental concurrent algorithms arising in operating
systems, communication protocols and distributed
systems. Several model checkers, including Murphi,
developed by David Dill, are based on UNITY. A
limitation of UNITY was a lack of adequate structuring
mechanism. While this was not a major problem in
low-level applications, the current wide-spread use of
concurrency requires theories that go beyond managing
infrastructure to the level of massive applications.
Our current research, a programming model called Orc,
introduces mechanisms to organize the communication,
synchronization and coordination in programs that run
on wide-area networks. Orc includes constructs to
orchestrate the concurrent invocation of services to
achieve a goal --- while managing time-outs,
priorities, and failure of sites or communication.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Phothilimthana:2014:CSA,
author = "Phitchaya Mangpo Phothilimthana and Tikhon Jelvis and
Rohin Shah and Nishant Totla and Sarah Chasins and
Rastislav Bodik",
title = "{Chlorophyll}: synthesis-aided compiler for low-power
spatial architectures",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "396--407",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594339",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We developed Chlorophyll, a synthesis-aided
programming model and compiler for the GreenArrays
GA144, an extremely minimalist low-power spatial
architecture that requires partitioning the program
into fragments of no more than 256 instructions and 64
words of data. This processor is 100-times more energy
efficient than its competitors, but currently can only
be programmed using a low-level stack-based language.
The Chlorophyll programming model allows programmers to
provide human insight by specifying partial
partitioning of data and computation. The Chlorophyll
compiler relies on synthesis, sidestepping the need to
develop classical optimizations, which may be
challenging given the unusual architecture. To scale
synthesis to real problems, we decompose the
compilation into smaller synthesis
subproblems---partitioning, layout, and code
generation. We show that the synthesized programs are
no more than 65\% slower than highly optimized
expert-written programs and are faster than programs
produced by a heuristic, non-synthesizing version of
our compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Perelman:2014:TDS,
author = "Daniel Perelman and Sumit Gulwani and Dan Grossman and
Peter Provost",
title = "Test-driven synthesis",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "408--418",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594297",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming-by-example technologies empower end-users
to create simple programs merely by providing
input/output examples. Existing systems are designed
around solvers specialized for a specific set of data
types or domain-specific language (DSL). We present a
program synthesizer which can be parameterized by an
arbitrary DSL that may contain conditionals and loops
and therefore is able to synthesize programs in any
domain. In order to use our synthesizer, the user
provides a sequence of increasingly sophisticated
input/output examples along with an expert-written DSL
definition. These two inputs correspond to the two key
ideas that allow our synthesizer to work in arbitrary
domains. First, we developed a novel iterative
synthesis technique inspired by test-driven
development---which also gives our technique the name
of test-driven synthesis ---where the input/output
examples are consumed one at a time as the program is
refined. Second, the DSL allows our system to take an
efficient component-based approach to enumerating
possible programs. We present applications of our
synthesis methodology to end-user programming for
transformations over strings, XML, and table layouts.
We compare our synthesizer on these applications to
state-of-the-art DSL-specific synthesizers as well to
the general purpose synthesizer Sketch.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Raychev:2014:CCS,
author = "Veselin Raychev and Martin Vechev and Eran Yahav",
title = "Code completion with statistical language models",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "419--428",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594321",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We address the problem of synthesizing code
completions for programs using APIs. Given a program
with holes, we synthesize completions for holes with
the most likely sequences of method calls. Our main
idea is to reduce the problem of code completion to a
natural-language processing problem of predicting
probabilities of sentences. We design a simple and
scalable static analysis that extracts sequences of
method calls from a large codebase, and index these
into a statistical language model. We then employ the
language model to find the highest ranked sentences,
and use them to synthesize a code completion. Our
approach is able to synthesize sequences of calls
across multiple objects together with their arguments.
Experiments show that our approach is fast and
effective. Virtually all computed completions
typecheck, and the desired completion appears in the
top 3 results in 90\% of the cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Greenaway:2014:DSS,
author = "David Greenaway and Japheth Lim and June Andronick and
Gerwin Klein",
title = "Don't sweat the small stuff: formal verification of
{C} code without the pain",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "429--439",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594296",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an approach for automatically generating
provably correct abstractions from C source code that
are useful for practical implementation verification.
The abstractions are easier for a human verification
engineer to reason about than the implementation and
increase the productivity of interactive code proof. We
guarantee soundness by automatically generating proofs
that the abstractions are correct. In particular, we
show two key abstractions that are critical for
verifying systems-level C code: automatically turning
potentially overflowing machine-word arithmetic into
ideal integers, and transforming low-level C pointer
reasoning into separate abstract heaps. Previous work
carrying out such transformations has either done so
using unverified translations, or required significant
proof engineering effort. We implement these
abstractions in an existing proof-producing
specification transformation framework named
AutoCorres, developed in Isabelle/HOL, and demonstrate
its effectiveness in a number of case studies. We show
scalability on multiple OS microkernels, and we show
how our changes to AutoCorres improve productivity for
total correctness by porting an existing high-level
verification of the Schorr--Waite algorithm to a
low-level C implementation with minimal effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Pek:2014:NPD,
author = "Edgar Pek and Xiaokang Qiu and P. Madhusudan",
title = "Natural proofs for data structure manipulation in {C}
using separation logic",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "440--451",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594325",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The natural proof technique for heap verification
developed by Qiu et al. [32] provides a platform for
powerful sound reasoning for specifications written in
a dialect of separation logic called Dryad. Natural
proofs are proof tactics that enable automated
reasoning exploiting recursion, mimicking common
patterns found in human proofs. However, these proofs
are known to work only for a simple toy language [32].
In this work, we develop a framework called VCDryad
that extends the Vcc framework [9] to provide an
automated deductive framework against separation logic
specifications for C programs based on natural proofs.
We develop several new techniques to build this
framework, including (a) a novel tool architecture that
allows encoding natural proofs at a higher level in
order to use the existing Vcc framework (including its
intricate memory model, the underlying type-checker,
and the SMT-based verification infrastructure), and (b)
a synthesis of ghost-code annotations that captures
natural proof tactics, in essence forcing Vcc to find
natural proofs using primarily decidable theories. We
evaluate our tool extensively, on more than 150
programs, ranging from code manipulating standard data
structures, well-known open source library routines
(Glib, OpenBSD), Linux kernel routines, customized OS
data structures, etc. We show that all these C programs
can be fully automatically verified using natural
proofs (given pre/post conditions and loop invariants)
without any user-provided proof tactics. VCDryad is
perhaps the first deductive verification framework for
heap-manipulating programs in a real language that can
prove such a wide variety of programs automatically.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Ricketts:2014:AFP,
author = "Daniel Ricketts and Valentin Robert and Dongseok Jang
and Zachary Tatlock and Sorin Lerner",
title = "Automating formal proofs for reactive systems",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "452--462",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594338",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Implementing systems in proof assistants like Coq and
proving their correctness in full formal detail has
consistently demonstrated promise for making extremely
strong guarantees about critical software, ranging from
compilers and operating systems to databases and web
browsers. Unfortunately, these verifications demand
such heroic manual proof effort, even for a single
system, that the approach has not been widely adopted.
We demonstrate a technique to eliminate the manual
proof burden for verifying many properties within an
entire class of applications, in our case reactive
systems, while only expending effort comparable to the
manual verification of a single system. A crucial
insight of our approach is simultaneously designing
both (1) a domain-specific language (DSL) for
expressing reactive systems and their correctness
properties and (2) proof automation which exploits the
constrained language of both programs and properties to
enable fully automatic, pushbutton verification. We
apply this insight in a deeply embedded Coq DSL, dubbed
Reflex, and illustrate Reflex's expressiveness by
implementing and automatically verifying realistic
systems including a modern web browser, an SSH server,
and a web server. Using Reflex radically reduced the
proof burden: in previous, similar versions of our
benchmarks written in Coq by experts, proofs accounted
for over 80\% of the code base; our versions require no
manual proofs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Xiao:2014:PPI,
author = "Xiao Xiao and Qirun Zhang and Jinguo Zhou and Charles
Zhang",
title = "Persistent pointer information",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "463--474",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594314",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Pointer information, indispensable for static analysis
tools, is expensive to compute and query. We provide a
query-efficient persistence technique, Pestrie, to
mitigate the costly computation and slow querying of
precise pointer information. Leveraging equivalence and
hub properties, Pestrie can compress pointer
information and answers pointer related queries very
efficiently. The experiment shows that Pestrie produces
10.5X and 17.5X smaller persistent files than the
traditional bitmap and BDD encodings. Meanwhile,
Pestrie is 2.9X to 123.6X faster than traditional
demand-driven approaches for serving points-to related
queries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Oh:2014:SCS,
author = "Hakjoo Oh and Wonchan Lee and Kihong Heo and Hongseok
Yang and Kwangkeun Yi",
title = "Selective context-sensitivity guided by impact
pre-analysis",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "475--484",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594318",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a method for selectively applying
context-sensitivity during interprocedural program
analysis. Our method applies context-sensitivity only
when and where doing so is likely to improve the
precision that matters for resolving given queries. The
idea is to use a pre-analysis to estimate the impact of
context-sensitivity on the main analysis's precision,
and to use this information to find out when and where
the main analysis should turn on or off its
context-sensitivity. We formalize this approach and
prove that the analysis always benefits from the
pre-analysis-guided context-sensitivity. We implemented
this selective method for an existing
industrial-strength interval analyzer for full C. The
method reduced the number of (false) alarms by 24.4\%,
while increasing the analysis cost by 27.8\% on
average. The use of the selective method is not limited
to context-sensitivity. We demonstrate this generality
by following the same principle and developing a
selective relational analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Smaragdakis:2014:IAC,
author = "Yannis Smaragdakis and George Kastrinis and George
Balatsouras",
title = "Introspective analysis: context-sensitivity, across
the board",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "485--495",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594320",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Context-sensitivity is the primary approach for adding
more precision to a points-to analysis, while hopefully
also maintaining scalability. An oft-reported problem
with context-sensitive analyses, however, is that they
are bi-modal: either the analysis is precise enough
that it manipulates only manageable sets of data, and
thus scales impressively well, or the analysis gets
quickly derailed at the first sign of imprecision and
becomes orders-of-magnitude more expensive than would
be expected given the program's size. There is
currently no approach that makes precise
context-sensitive analyses (of any flavor: call-site-,
object-, or type-sensitive) scale across the board at a
level comparable to that of a context-insensitive
analysis. To address this issue, we propose
introspective analysis: a technique for uniformly
scaling context-sensitive analysis by eliminating its
performance-detrimental behavior, at a small precision
expense. Introspective analysis consists of a common
adaptivity pattern: first perform a context-insensitive
analysis, then use the results to selectively refine
(i.e., analyze context-sensitively) program elements
that will not cause explosion in the running time or
space. The technical challenge is to appropriately
identify such program elements. We show that a simple
but principled approach can be remarkably effective,
achieving scalability (often with dramatic speedup) for
benchmarks previously completely out-of-reach for deep
context-sensitive analyses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Ahn:2014:IJP,
author = "Wonsun Ahn and Jiho Choi and Thomas Shull and
Mar{\'\i}a J. Garzar{\'a}n and Josep Torrellas",
title = "Improving {JavaScript} performance by deconstructing
the type system",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "496--507",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594332",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Increased focus on JavaScript performance has resulted
in vast performance improvements for many benchmarks.
However, for actual code used in websites, the attained
improvements often lag far behind those for popular
benchmarks. This paper shows that the main reason
behind this short-fall is how the compiler understands
types. JavaScript has no concept of types, but the
compiler assigns types to objects anyway for ease of
code generation. We examine the way that the Chrome V8
compiler defines types, and identify two design
decisions that are the main reasons for the lack of
improvement: (1) the inherited prototype object is part
of the current object's type definition, and (2) method
bindings are also part of the type definition. These
requirements make types very unpredictable, which
hinders type specialization by the compiler. Hence, we
modify V8 to remove these requirements, and use it to
compile the JavaScript code assembled by JSBench from
real websites. On average, we reduce the execution time
of JSBench by 36\%, and the dynamic instruction count
by 49\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Vilk:2014:DBB,
author = "John Vilk and Emery D. Berger",
title = "{Doppio}: breaking the browser language barrier",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "508--518",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594293",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Web browsers have become a de facto universal
operating system, and JavaScript its instruction set.
Unfortunately, running other languages in the browser
is not generally possible. Translation to JavaScript is
not enough because browsers are a hostile environment
for other languages. Previous approaches are either
non-portable or require extensive modifications for
programs to work in a browser. This paper presents
Doppio, a JavaScript-based runtime system that makes it
possible to run unaltered applications written in
general-purpose languages directly inside the browser.
Doppio provides a wide range of runtime services,
including a file system that enables local and external
(cloud-based) storage, an unmanaged heap, sockets,
blocking I/O, and multiple threads. We demonstrate
DOPPIO's usefulness with two case studies: we extend
Emscripten with Doppio, letting it run an unmodified
C++ application in the browser with full functionality,
and present DoppioJVM, an interpreter that runs
unmodified JVM programs directly in the browser. While
substantially slower than a native JVM (between 24X and
42X slower on CPU-intensive benchmarks in Google
Chrome), DoppioJVM makes it feasible to directly reuse
existing, non compute-intensive code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Lu:2014:DED,
author = "Li Lu and Weixing Ji and Michael L. Scott",
title = "Dynamic enforcement of determinism in a parallel
scripting language",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "519--529",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594300",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Determinism is an appealing property for parallel
programs, as it simplifies understanding, reasoning and
debugging. It is particularly appealing in dynamic
(scripting) languages, where ease of programming is a
dominant design goal. Some existing parallel languages
use the type system to enforce determinism statically,
but this is not generally practical for dynamic
languages. In this paper, we describe how determinism
can be obtained---and dynamically
enforced/verified---for appropriate extensions to a
parallel scripting language. Specifically, we introduce
the constructs of Deterministic Parallel Ruby (DPR),
together with a run-time system (Tardis) that verifies
properties required for determinism, including correct
usage of reductions and commutative operators, and the
mutual independence (data-race freedom) of concurrent
tasks. Experimental results confirm that DPR can
provide scalable performance on multicore machines and
that the overhead of Tardis is low enough for practical
testing. In particular, Tardis significantly
outperforms alternative data-race detectors with
comparable functionality. We conclude with a discussion
of future directions in the dynamic enforcement of
determinism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Torlak:2014:LSV,
author = "Emina Torlak and Rastislav Bodik",
title = "A lightweight symbolic virtual machine for
solver-aided host languages",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "530--541",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594340",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Solver-aided domain-specific languages (SDSLs) are an
emerging class of computer-aided programming systems.
They ease the construction of programs by using
satisfiability solvers to automate tasks such as
verification, debugging, synthesis, and
non-deterministic execution. But reducing programming
tasks to satisfiability problems involves translating
programs to logical constraints, which is an
engineering challenge even for domain-specific
languages. We have previously shown that translation to
constraints can be avoided if SDSLs are implemented by
(traditional) embedding into a host language that is
itself solver-aided. This paper describes how to
implement a symbolic virtual machine (SVM) for such a
host language. Our symbolic virtual machine is
lightweight because it compiles to constraints only a
small subset of the host's constructs, while allowing
SDSL designers to use the entire language, including
constructs for DSL embedding. This lightweight
compilation employs a novel symbolic execution
technique with two key properties: it produces compact
encodings, and it enables concrete evaluation to strip
away host constructs that are outside the subset
compilable to constraints. Our symbolic virtual machine
architecture is at the heart of Rosette, a solver-aided
language that is host to several new SDSLs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Le:2014:FFD,
author = "Vu Le and Sumit Gulwani",
title = "{FlashExtract}: a framework for data extraction by
examples",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "542--553",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594333",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Various document types that combine model and view
(e.g., text files, webpages, spreadsheets) make it easy
to organize (possibly hierarchical) data, but make it
difficult to extract raw data for any further
manipulation or querying. We present a general
framework FlashExtract to extract relevant data from
semi-structured documents using examples. It includes:
(a) an interaction model that allows end-users to give
examples to extract various fields and to relate them
in a hierarchical organization using structure and
sequence constructs. (b) an inductive synthesis
algorithm to synthesize the intended program from few
examples in any underlying domain-specific language for
data extraction that has been built using our specified
algebra of few core operators (map, filter, merge, and
pair). We describe instantiation of our framework to
three different domains: text files, webpages, and
spreadsheets. On our benchmark comprising 75 documents,
FlashExtract is able to extract intended data using an
average of 2.36 examples in 0.84 seconds per field.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Sousa:2014:CQU,
author = "Marcelo Sousa and Isil Dillig and Dimitrios Vytiniotis
and Thomas Dillig and Christos Gkantsidis",
title = "Consolidation of queries with user-defined functions",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "554--564",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594305",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Motivated by streaming and data analytics scenarios
where many queries operate on the same data and perform
similar computations, we propose program consolidation
for merging multiple user-defined functions (UDFs) that
operate on the same input. Program consolidation
exploits common computations between UDFs to generate
an equivalent optimized function whose execution cost
is often much smaller (and never greater) than the sum
of the costs of executing each function individually.
We present a sound consolidation calculus and an
effective algorithm for consolidating multiple UDFs.
Our approach is purely static and uses symbolic
SMT-based techniques to identify shared or redundant
computations. We have implemented the proposed
technique on top of the Naiad data processing system.
Our experiments show that our algorithm dramatically
improves overall job completion time when executing
user-defined filters that operate on the same data and
perform similar computations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Luu:2014:MCC,
author = "Loi Luu and Shweta Shinde and Prateek Saxena and Brian
Demsky",
title = "A model counter for constraints over unbounded
strings",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "565--576",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594331",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Model counting is the problem of determining the
number of solutions that satisfy a given set of
constraints. Model counting has numerous applications
in the quantitative analyses of program execution time,
information flow, combinatorial circuit designs as well
as probabilistic reasoning. We present a new approach
to model counting for structured data types,
specifically strings in this work. The key ingredient
is a new technique that leverages generating functions
as a basic primitive for combinatorial counting. Our
tool SMC which embodies this approach can model count
for constraints specified in an expressive string
language efficiently and precisely, thereby
outperforming previous finite-size analysis tools. SMC
is expressive enough to model constraints arising in
real-world JavaScript applications and UNIX C
utilities. We demonstrate the practical feasibility of
performing quantitative analyses arising in security
applications, such as determining the comparative
strengths of password strength meters and determining
the information leakage via side channels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Niu:2014:MCF,
author = "Ben Niu and Gang Tan",
title = "Modular control-flow integrity",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "577--587",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594295",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Control-Flow Integrity (CFI) is a software-hardening
technique. It inlines checks into a program so that its
execution always follows a predetermined Control-Flow
Graph (CFG). As a result, CFI is effective at
preventing control-flow hijacking attacks. However,
past fine-grained CFI implementations do not support
separate compilation, which hinders its adoption. We
present Modular Control-Flow Integrity (MCFI), a new
CFI technique that supports separate compilation. MCFI
allows modules to be independently instrumented and
linked statically or dynamically. The combined module
enforces a CFG that is a combination of the individual
modules' CFGs. One challenge in supporting dynamic
linking in multithreaded code is how to ensure a safe
transition from the old CFG to the new CFG when
libraries are dynamically linked. The key technique we
use is to have the CFG represented in a runtime data
structure and have reads and updates of the data
structure wrapped in transactions to ensure thread
safety. Our evaluation on SPECCPU2006 benchmarks shows
that MCFI supports separate compilation, incurs low
overhead of around 5\%, and enhances security.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Yang:2014:DSL,
author = "Edward Z. Yang and David Mazi{\`e}res",
title = "Dynamic space limits for {Haskell}",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "588--598",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594341",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe the semantics and implementation of a
space limits system for Haskell, which allows
programmers to create resource containers that enforce
bounded resident memory usage at runtime. Our system is
distinguished by a clear allocator-pays semantics drawn
from previous experience with profiling in Haskell and
an implementation strategy which uses a
block-structured heap to organize containers, allowing
us to enforce limits with high accuracy. To deal with
the problem of deallocating data in a garbage collected
heap, we propose a novel taint-based mechanism that
unifies the existing practices of revocable pointers
and killing threads in order to reclaim memory. Our
system is implemented in GHC, a production-strength
compiler for Haskell.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
received = "PLDI '14 conference proceedings.",
}
@Article{Tsafrir:2014:ELV,
author = "Dan Tsafrir",
title = "Experiences in the land of virtual abstractions",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "1--2",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576215",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "The Microsoft Research Drawbridge Project began with a
simple question: Is it possible to achieve the benefits
of hardware virtual machines without the overheads?
Following that question, we have built a line of
exploratory prototypes. These prototypes range from an
ARM-based phone that runs x86 Windows binaries to new
forms of secure computation. In this talk, I'll briefly
describe our various prototypes and the evidence we
have accumulated that our first question can be
answered in the affirmative.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Hizver:2014:RTD,
author = "Jennia Hizver and Tzi-cker Chiueh",
title = "Real-time deep virtual machine introspection and its
applications",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "3--14",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576196",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Virtual Machine Introspection (VMI) provides the
ability to monitor virtual machines (VM) in an
agentless fashion by gathering VM execution states from
the hypervisor and analyzing those states to extract
information about a running operating system (OS)
without installing an agent inside the VM. VMI's main
challenge lies in the difficulty in converting
low-level byte string values into high-level semantic
states of the monitored VM's OS. In this work, we
tackle this challenge by developing a real-time kernel
data structure monitoring (RTKDSM) system that
leverages the rich OS analysis capabilities of
Volatility, an open source computer forensics
framework, to significantly simplify and automate
analysis of VM execution states. The RTKDSM system is
designed as an extensible software framework that is
meant to be extended to perform application-specific VM
state analysis. In addition, the RTKDSM system is able
to perform real-time monitoring of any changes made to
the extracted OS states of guest VMs. This real-time
monitoring capability is especially important for
VMI-based security applications. To minimize the
performance overhead associated with real-time kernel
data structure monitoring, the RTKDSM system has
incorporated several optimizations whose effectiveness
is reported in this paper.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Arya:2014:TRG,
author = "Kapil Arya and Yury Baskakov and Alex Garthwaite",
title = "Tesseract: reconciling guest {I/O} and hypervisor
swapping in a {VM}",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "15--28",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576198",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Double-paging is an often-cited, if unsubstantiated,
problem in multi-level scheduling of memory between
virtual machines (VMs) and the hypervisor. This problem
occurs when both a virtualized guest and the hypervisor
overcommit their respective physical address-spaces.
When the guest pages out memory previously swapped out
by the hypervisor, it initiates an expensive sequence
of steps causing the contents to be read in from the
hypervisor swapfile only to be written out again,
significantly lengthening the time to complete the
guest I/O request. As a result, performance rapidly
drops. We present Tesseract, a system that directly and
transparently addresses the double-paging problem.
Tesseract tracks when guest and hypervisor I/O
operations are redundant and modifies these I/Os to
create indirections to existing disk blocks containing
the page contents. Although our focus is on reconciling
I/Os between the guest disks and hypervisor swap, our
technique is general and can reconcile, or deduplicate,
I/Os for guest pages read or written by the VM.
Deduplication of disk blocks for file contents accessed
in a common manner is well-understood. One challenge
that our approach faces is that the locality of guest
I/Os (reflecting the guest's notion of disk layout)
often differs from that of the blocks in the hypervisor
swap. This loss of locality through indirection results
in significant performance loss on subsequent guest
reads. We propose two alternatives to recovering this
lost locality, each based on the idea of asynchronously
reorganizing the indirected blocks in persistent
storage. We evaluate our system and show that it can
significantly reduce the costs of double-paging. We
focus our experiments on a synthetic benchmark designed
to highlight its effects. In our experiments we observe
Tesseract can improve our benchmark's throughput by as
much as 200\% when using traditional disks and by as
much as 30\% when using SSD. At the same time worst
case application responsiveness can be improved by a
factor of 5.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Kim:2014:VAM,
author = "Hwanju Kim and Sangwook Kim and Jinkyu Jeong and
Joonwon Lee",
title = "Virtual asymmetric multiprocessor for interactive
performance of consolidated desktops",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "29--40",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576199",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "This paper presents virtual asymmetric multiprocessor,
a new scheme of virtual desktop scheduling on
multi-core processors for user-interactive performance.
The proposed scheme enables virtual CPUs to be
dynamically performance-asymmetric based on their
hosted workloads. To enhance user experience on
consolidated desktops, our scheme provides interactive
workloads with fast virtual CPUs, which have more
computing power than those hosting background workloads
in the same virtual machine. To this end, we devise a
hypervisor extension that transparently classifies
background tasks from potentially interactive
workloads. In addition, we introduce a guest extension
that manipulates the scheduling policy of an operating
system in favor of our hypervisor-level scheme so that
interactive performance can be further improved. Our
evaluation shows that the proposed scheme significantly
improves interactive performance of application launch,
Web browsing, and video playback applications when
CPU-intensive workloads highly disturb the interactive
workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Ben-Yehuda:2014:GMD,
author = "Orna Agmon Ben-Yehuda and Eyal Posener and Muli
Ben-Yehuda and Assaf Schuster and Ahuva Mu'alem",
title = "{Ginseng}: market-driven memory allocation",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "41--52",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576197",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Physical memory is the scarcest resource in today's
cloud computing platforms. Cloud providers would like
to maximize their clients' satisfaction by renting
precious physical memory to those clients who value it
the most. But real-world cloud clients are selfish:
they will only tell their providers the truth about how
much they value memory when it is in their own best
interest to do so. How can real-world cloud providers
allocate memory efficiently to those (selfish) clients
who value it the most? We present Ginseng, the first
market-driven cloud system that allocates memory
efficiently to selfish cloud clients. Ginseng
incentivizes selfish clients to bid their true value
for the memory they need when they need it. Ginseng
continuously collects client bids, finds an efficient
memory allocation, and re-allocates physical memory to
the clients that value it the most. Ginseng achieves a
6.2$ \times $--15.8x improvement (83\%--100\% of the
optimum) in aggregate client satisfaction when compared
with state-of-the-art approaches for cloud memory
allocation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Hwang:2014:MFG,
author = "Jinho Hwang and Ahsen Uppal and Timothy Wood and Howie
Huang",
title = "{Mortar}: filling the gaps in data center memory",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "53--64",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576203",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Data center servers are typically overprovisioned,
leaving spare memory and CPU capacity idle to handle
unpredictable workload bursts by the virtual machines
running on them. While this allows for fast hotspot
mitigation, it is also wasteful. Unfortunately, making
use of spare capacity without impacting active
applications is particularly difficult for memory since
it typically must be allocated in coarse chunks over
long timescales. In this work we propose repurposing
the poorly utilized memory in a data center to store a
volatile data store that is managed by the hypervisor.
We present two uses for our Mortar framework: as a
cache for prefetching disk blocks, and as an
application-level distributed cache that follows the
memcached protocol. Both prototypes use the framework
to ask the hypervisor to store useful, but recoverable
data within its free memory pool. This allows the
hypervisor to control eviction policies and prioritize
access to the cache. We demonstrate the benefits of our
prototypes using realistic web applications and disk
benchmarks, as well as memory traces gathered from live
servers in our university's IT department. By expanding
and contracting the data store size based on the free
memory available, Mortar improves average response time
of a web application by up to 35\% compared to a fixed
size memcached deployment, and improves overall video
streaming performance by 45\% through prefetching.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Chen:2014:CCB,
author = "Licheng Chen and Zhipeng Wei and Zehan Cui and Mingyu
Chen and Haiyang Pan and Yungang Bao",
title = "{CMD}: classification-based memory deduplication
through page access characteristics",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "65--76",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576204",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Limited main memory size is considered as one of the
major bottlenecks in virtualization environments.
Content-Based Page Sharing (CBPS) is an efficient
memory deduplication technique to reduce server memory
requirements, in which pages with same content are
detected and shared into a single copy. As the widely
used implementation of CBPS, Kernel Samepage Merging
(KSM) maintains the whole memory pages into two global
comparison trees (a stable tree and an unstable tree).
To detect page sharing opportunities, each tracked page
needs to be compared with pages already in these two
large global trees. However since the vast majority of
compared pages have different content with it, that
will induce massive futility comparisons and thus heavy
overhead. In this paper, we propose a lightweight page
Classification-based Memory Deduplication approach
named CMD to reduce futile page comparison overhead
meanwhile to detect page sharing opportunities
efficiently. The main innovation of CMD is that pages
are grouped into different classifications based on
page access characteristics. Pages with similar access
characteristics are suggested to have higher
possibility with same content, thus they are grouped
into the same classification. In CMD, the large global
comparison trees are divided into multiple small trees
with dedicated local ones in each page classification.
Page comparisons are performed just in the same
classification, and pages from different
classifications are never compared (since they probably
result in futile comparisons). The experimental results
show that CMD can efficiently reduce page comparisons
(by about 68.5\%) meanwhile detect nearly the same (by
more than 98\%) or even more page sharing
opportunities.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Robatmili:2014:MRL,
author = "Behnam Robatmili and Calin Cascaval and Mehrdad
Reshadi and Madhukar N. Kedlaya and Seth Fowler and
Vrajesh Bhavsar and Michael Weber and Ben Hardekopf",
title = "{MuscalietJS}: rethinking layered dynamic web
runtimes",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "77--88",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576211",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Layered JavaScript engines, in which the JavaScript
runtime is built on top another managed runtime,
provide better extensibility and portability compared
to traditional monolithic engines. In this paper, we
revisit the design of layered JavaScript engines and
propose a layered architecture, called MuscalietJS2,
that splits the responsibilities of a JavaScript engine
between a high-level, JavaScript-specific component and
a low-level, language-agnostic .NET VM. To make up for
the performance loss due to layering, we propose a two
pronged approach: high-level JavaScript optimizations
and exploitation of low-level VM features that produce
very efficient code for hot functions. We demonstrate
the validity of the MuscalietJS design through a
comprehensive evaluation using both the Sunspider
benchmarks and a set of web workloads. We demonstrate
that our approach outperforms other layered engines
such as IronJS and Rhino engines while providing
extensibility, adaptability and portability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Kalibera:2014:FAS,
author = "Tomas Kalibera and Petr Maj and Floreal Morandat and
Jan Vitek",
title = "A fast abstract syntax tree interpreter for {R}",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "89--102",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576205",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Dynamic languages have been gaining popularity to the
point that their performance is starting to matter. The
effort required to develop a production-quality,
high-performance runtime is, however, staggering and
the expertise required to do so is often out of reach
of the community maintaining a particular language.
Many domain specific languages remain stuck with naive
implementations, as they are easy to write and simple
to maintain for domain scientists. In this paper, we
try to see how far one can push a naive implementation
while remaining portable and not requiring expertise in
compilers and runtime systems. We choose the R
language, a dynamic language used in statistics, as the
target of our experiment and adopt the simplest
possible implementation strategy, one based on
evaluation of abstract syntax trees. We build our
interpreter on top of a Java virtual machine and use
only facilities available to all Java programmers. We
compare our results to other implementations of R.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Kedlaya:2014:DDL,
author = "Madhukar N. Kedlaya and Behnam Robatmili and Cglin
Cascaval and Ben Hardekopf",
title = "Deoptimization for dynamic language {JITs} on typed,
stack-based virtual machines",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "103--114",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576209",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "We are interested in implementing dynamic language
runtimes on top of language-level virtual machines.
Type specialization is a critical optimization for
dynamic language runtimes: generic code that handles
any type of data is replaced with specialized code for
particular types observed during execution. However,
types can change, and the runtime must recover whenever
unexpected types are encountered. The state-of-the-art
recovery mechanism is called deoptimization.
Deoptimization is a well-known technique for dynamic
language runtimes implemented in low-level languages
like C. However, no dynamic language runtime
implemented on top of a virtual machine such as the
Common Language Runtime (CLR) or the Java Virtual
Machine (JVM) uses deoptimization, because the
implementation thereof used in low-level languages is
not possible. In this paper we propose a novel
technique that enables deoptimization for dynamic
language runtimes implemented on top of typed,
stack-based virtual machines. Our technique does not
require any changes to the underlying virtual machine.
We implement our proposed technique in a JavaScript
language implementation, MCJS, running on top of the
Mono runtime (CLR). We evaluate our implementation
against the current state-of-the-art recovery mechanism
for virtual machine-based runtimes, as implemented both
in MCJS and in IronJS. We show that deoptimization
provides significant performance benefits, even for
runtimes running on top of a virtual machine.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Vitek:2014:CTR,
author = "Jan Vitek",
title = "The case for the three {R}'s of systems research:
repeatability, reproducibility and rigor",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "115--116",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576216",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Computer systems research spans sub-disciplines that
include embedded systems, programming languages,
networking, and operating systems. In this talk my
contention is that a number of structural factors
inhibit quality systems research. Symptoms of the
problem include unrepeatable and unreproduced results
as well as results that are either devoid of meaning or
that measure the wrong thing. I will illustrate the
impact of these issues on our research output with
examples from the development and empirical evaluation
of the Schism real-time garbage collection algorithm
that is shipped with the FijiVM --- a Java virtual
machine for embedded and mobile devices. I will argue
that our field should foster: repetition of results,
independent reproduction, as well as rigorous
evaluation. I will outline some baby steps taken by
several computer conferences. In particular I will
focus on the introduction of Artifact Evaluation
Committees or AECs to ECOOP, OOPLSA, PLDI and soon
POPL. The goal of the AECs is to encourage author to
package the software artifacts that they used to
support the claims made in their paper and to submit
these artifacts for evaluation. AECs were carefully
designed to provide positive feedback to the authors
that take the time to create repeatable research.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Chang:2014:EMV,
author = "Chao-Jui Chang and Jan-Jan Wu and Wei-Chung Hsu and
Pangfeng Liu and Pen-Chung Yew",
title = "Efficient memory virtualization for {Cross-ISA} system
mode emulation",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "117--128",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576201",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Cross-ISA system-mode emulation has many important
applications. For example, Cross-ISA system-mode
emulation helps computer architects and OS developers
trace and debug kernel execution-flow efficiently by
emulating a slower platform (such as ARM) on a more
powerful plat-form (such as an x86 machine). Cross-ISA
system-mode emulation also enables workload
consolidation in data centers with platforms of
different instruction-set architectures (ISAs).
However, system-mode emulation is much slower. One
major overhead in system-mode emulation is the
multi-level memory address translation that maps guest
virtual address to host physical address. Shadow page
tables (SPT) have been used to reduce such overheads,
but primarily for same-ISA virtualization. In this
paper we propose a novel approach called embedded
shadow page tables (ESPT). EPST embeds a shadow page
table into the address space of a cross-ISA dynamic
binary translation (DBT) and uses hardware memory
management unit in the CPU to translate memory
addresses, instead of software translation in a current
DBT emulator like QEMU. We also use the larger address
space on modern 64-bit CPUs to accommodate our DBT
emulator so that it will not interfere with the guest
operating system. We incorporate our new scheme into
QEMU, a popular, retargetable cross-ISA system
emulator. SPEC CINT2006 benchmark results indicate that
our technique achieves an average speedup of 1.51 times
in system mode when emulating ARM on x86, and a 1.59
times speedup for emulating IA32 on x86_64.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Zhang:2014:PSS,
author = "Mingwei Zhang and Rui Qiao and Niranjan Hasabnis and
R. Sekar",
title = "A platform for secure static binary instrumentation",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "129--140",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576208",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program instrumentation techniques form the basis of
many recent software security defenses, including
defenses against common exploits and security policy
enforcement. As compared to source-code
instrumentation, binary instrumentation is easier to
use and more broadly applicable due to the ready
availability of binary code. Two key features needed
for security instrumentations are (a) it should be
applied to all application code, including code
contained in various system and application libraries,
and (b) it should be non-bypassable. So far, dynamic
binary instrumentation (DBI) techniques have provided
these features, whereas static binary instrumentation
(SBI) techniques have lacked them. These features,
combined with ease of use, have made DBI the de facto
choice for security instrumentations. However, DBI
techniques can incur high overheads in several common
usage scenarios, such as application startups,
system-calls, and many real-world applications. We
therefore develop a new platform for secure static
binary instrumentation (PSI) that overcomes these
drawbacks of DBI techniques, while retaining the
security, robustness and ease-of-use features. We
illustrate the versatility of PSI by developing several
instrumentation applications: basic block counting,
shadow stack defense against control-flow hijack and
return-oriented programming attacks, and system call
and library policy enforcement. While being competitive
with the best DBI tools on CPU-intensive SPEC 2006
benchmark, PSI provides an order of magnitude reduction
in overheads on a collection of real-world
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Lyu:2014:DER,
author = "Yi-Hong Lyu and Ding-Yong Hong and Tai-Yi Wu and
Jan-Jan Wu and Wei-Chung Hsu and Pangfeng Liu and
Pen-Chung Yew",
title = "{DBILL}: an efficient and retargetable dynamic binary
instrumentation framework using {LLVM} backend",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "141--152",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576213",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic Binary Instrumentation (DBI) is a core
technology for building debugging and profiling tools
for application executables. Most state-of-the-art DBI
systems have focused on the same instruction set
architecture (ISA) where the guest binary and the host
binary have the same ISA. It is uncommon to have a
cross-ISA DBI system, such as a system that instruments
ARM executables to run on x86 machines. We believe
cross-ISA DBI systems are increasingly more important,
since ARM executables could be more productively
analyzed on x86 based machines such as commonly
available PCs and servers. In this paper, we present
DBILL, a cross-ISA and retargetable dynamic binary
instrumentation framework that builds on both QEMU and
LLVM. The DBILL framework enables LLVM-based static
instrumentation tools to become DBI ready, and
deployable to different target architectures. Using
address sanitizer and memory sanitizer as
implementation examples, we show DBILL is an efficient,
versatile and easy to use cross-ISA retargetable DBI
framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Zheng:2014:CCM,
author = "Jie Zheng and Tze Sing Eugene Ng and Kunwadee
Sripanidkulchai and Zhaolei Liu",
title = "{COMMA}: coordinating the migration of multi-tier
applications",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "153--164",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576200",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Multi-tier applications are widely deployed in today's
virtualized cloud computing environments. At the same
time, management operations in these virtualized
environments, such as load balancing, hardware
maintenance, workload consolidation, etc., often make
use of live virtual machine (VM) migration to control
the placement of VMs. Although existing solutions are
able to migrate a single VM efficiently, little
attention has been devoted to migrating related VMs in
multi-tier applications. Ignoring the relatedness of
VMs during migration can lead to serious application
performance degradation. This paper formulates the
multi-tier application migration problem, and presents
a new communication-impact-driven coordinated approach,
as well as a system called COMMA that realizes this
approach. Through extensive testbed experiments,
numerical analyses, and a demonstration of COMMA on
Amazon EC2, we show that this approach is highly
effective in minimizing migration's impact on
multi-tier applications' performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Kumar:2014:FBE,
author = "Vivek Kumar and Stephen M. Blackburn and David Grove",
title = "Friendly barriers: efficient work-stealing with return
barriers",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "165--176",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576207",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper addresses the problem of efficiently
supporting parallelism within a managed runtime. A
popular approach for exploiting software parallelism on
parallel hardware is task parallelism, where the
programmer explicitly identifies potential parallelism
and the runtime then schedules the work. Work-stealing
is a promising scheduling strategy that a runtime may
use to keep otherwise idle hardware busy while
relieving overloaded hardware of its burden. However,
work-stealing comes with substantial overheads. Recent
work identified sequential overheads of work-stealing,
those that occur even when no stealing takes place, as
a significant source of overhead. That work was able to
reduce sequential overheads to just 15\%. In this work,
we turn to dynamic overheads, those that occur each
time a steal takes place. We show that the dynamic
overhead is dominated by introspection of the victim's
stack when a steal takes place. We exploit the idea of
a low overhead return barrier to reduce the dynamic
overhead by approximately half, resulting in total
performance improvements of as much as 20\%. Because,
unlike prior work, we attack the overheads directly due
to stealing and therefore attack the overheads that
grow as parallelism grows, we improve the scalability
of work-stealing applications. This result is
complementary to recent work addressing the sequential
overheads of work-stealing. This work therefore
substantially relieves work-stealing of the increasing
pressure due to increasing intra-node hardware
parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Horie:2014:SDJ,
author = "Michihiro Horie and Kazunori Ogata and Kiyokuni
Kawachiya and Tamiya Onodera",
title = "String deduplication for {Java}-based middleware in
virtualized environments",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "177--188",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576210",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "To increase the memory efficiency in physical servers
is a significant concern for increasing the number of
virtual machines (VM) in them. When similar web
application service runs in each guest VM, many string
data with the same values are created in every guest
VMs. These duplications of string data are redundant
from the viewpoint of memory efficiency in the host OS.
This paper proposes two approaches to reduce the
duplication in Java string in a single Java VM (JVM)
and across JVMs. The first approach is to share string
objects cross JVMs by using a read-only memory-mapped
file. The other approach is to selectively unify string
objects created at runtime in the web applications.
This paper evaluates our approach by using the Apache
DayTrader and the DaCapo benchmark suite. Our prototype
implementation achieved 7\% to 12\% reduction in the
total size of the objects allocated over the lifetime
of the programs. In addition, we observed the
performance of DayTrader was maintained even under a
situation of high density guest VMs in a KVM host
machine.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Stecklina:2014:SHO,
author = "Julian Stecklina",
title = "Shrinking the hypervisor one subsystem at a time: a
userspace packet switch for virtual machines",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "189--200",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576202",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Efficient and secure networking between virtual
machines is crucial in a time where a large share of
the services on the Internet and in private datacenters
run in virtual machines. To achieve this efficiency,
virtualization solutions, such as Qemu/KVM, move toward
a monolithic system architecture in which all
performance critical functionality is implemented
directly in the hypervisor in privileged mode. This is
an attack surface in the hypervisor that can be used
from compromised VMs to take over the virtual machine
host and all VMs running on it. We show that it is
possible to implement an efficient network switch for
virtual machines as an unprivileged userspace component
running in the host system including the driver for the
upstream network adapter. Our network switch relies on
functionality already present in the KVM hypervisor and
requires no changes to Linux, the host operating
system, and the guest. Our userspace implementation
compares favorably to the existing in-kernel
implementation with respect to throughput and latency.
We reduced per-packet overhead by using a
run-to-completion model an are able to outperform the
unmodified system for VM-to-VM traffic by a large
margin when packet rates are high.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Li:2014:VSK,
author = "Ye Li and Richard West and Eric Missimer",
title = "A virtualized separation kernel for mixed criticality
systems",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "201--212",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576206",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Multi- and many-core processors are becoming
increasingly popular in embedded systems. Many of these
processors now feature hardware virtualization
capabilities, such as the ARM Cortex A15, and x86
processors with Intel VT-x or AMD-V support. Hardware
virtualization offers opportunities to partition
physical resources, including processor cores, memory
and I/O devices amongst guest virtual machines. Mixed
criticality systems and services can then co-exist on
the same platform in separate virtual machines.
However, traditional virtual machine systems are too
expensive because of the costs of trapping into
hypervisors to multiplex and manage machine physical
resources on behalf of separate guests. For example,
hypervisors are needed to schedule separate VMs on
physical processor cores. In this paper, we discuss the
design of the Quest-V separation kernel, which
partitions services of different criticalities in
separate virtual machines, or sandboxes. Each sandbox
encapsulates a subset of machine physical resources
that it manages without requiring intervention of a
hypervisor. Moreover, a hypervisor is not needed for
normal operation, except to bootstrap the system and
establish communication channels between sandboxes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Johnson:2014:CML,
author = "David Johnson and Mike Hibler and Eric Eric",
title = "Composable multi-level debugging with {Stackdb}",
journal = j-SIGPLAN,
volume = "49",
number = "7",
pages = "213--226",
month = jul,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2674025.2576212",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:29:50 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Virtual machine introspection (VMI) allows users to
debug software that executes within a virtual machine.
To support rich, whole-system analyses, a VMI tool must
inspect and control systems at multiple levels of the
software stack. Traditional debuggers enable inspection
and control, but they limit users to treating a whole
system as just one kind of target: e.g., just a kernel,
or just a process, but not both. We created Stackdb, a
debugging library with VMI support that allows one to
monitor and control a whole system through multiple,
coordinated targets. A target corresponds to a
particular level of the system's software stack;
multiple targets allow a user to observe a VM guest at
several levels of abstraction simultaneously. For
example, with Stackdb, one can observe a PHP script
running in a Linux process in a Xen VM via three
coordinated targets at the language, process, and
kernel levels. Within Stackdb, higher-level targets are
components that utilize lower-level targets; a key
contribution of Stackdb is its API that supports
multi-level and flexible ``stacks'' of targets. This
paper describes the challenges we faced in creating
Stackdb, presents the solutions we devised, and
evaluates Stackdb through its application to a
security-focused, whole-system case study.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '14 conference proceedings.",
}
@Article{Hill:2014:CCA,
author = "Mark D. Hill",
title = "21st century computer architecture",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "1--2",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2558890",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This talk has two parts. The first part will discuss
possible directions for computer architecture research,
including architecture as infrastructure, energy first,
impact of new technologies, and cross-layer
opportunities. This part is based on a 2012 Computing
Community Consortium (CCC) whitepaper effort led by
Hill, as well as other recent National Academy and ISAT
studies. See:
\url{http://cra.org/ccc/docs/init/21stcenturyarchitecturewhitepaper.pdf}.
The second part of the talk will discuss one or more
examples of cross-layer research advocated in the
first part. For example, our analysis shows that many
``big-memory'' server workloads, such as databases,
in-memory caches, and graph analytics, pay a high cost
for page-based virtual memory: up to 50\% of execution
time wasted. Via small changes to the operating system
(Linux) and hardware (x86-64 MMU), this work reduces
execution time these workloads waste to less than
0.5\%. The key idea is to map part of a process's
linear virtual address space with a new incarnation of
segmentation, while providing compatibility by mapping
the rest of the virtual address space with paging.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Liu:2014:PPF,
author = "Tongping Liu and Chen Tian and Ziang Hu and Emery D.
Berger",
title = "{PREDATOR}: predictive false sharing detection",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "3--14",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555244",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "False sharing is a notorious problem for multithreaded
applications that can drastically degrade both
performance and scalability. Existing approaches can
precisely identify the sources of false sharing, but
only report false sharing actually observed during
execution; they do not generalize across executions.
Because false sharing is extremely sensitive to object
layout, these detectors can easily miss false sharing
problems that can arise due to slight differences in
memory allocation order or object placement decisions
by the compiler. In addition, they cannot predict the
impact of false sharing on hardware with different
cache line sizes. This paper presents PREDATOR, a
predictive software-based false sharing detector.
PREDATOR generalizes from a single execution to
precisely predict false sharing that is latent in the
current execution. PREDATOR tracks accesses within a
range that could lead to false sharing given different
object placement. It also tracks accesses within
virtual cache lines, contiguous memory ranges that span
actual hardware cache lines, to predict sharing on
hardware platforms with larger cache line sizes. For
each, it reports the exact program location of
predicted false sharing problems, ranked by their
projected impact on performance. We evaluate PREDATOR
across a range of benchmarks and actual applications.
PREDATOR identifies problems undetectable with previous
tools, including two previously-unknown false sharing
problems, with no false positives. PREDATOR is able to
immediately locate false sharing problems in MySQL and
the Boost library that had eluded detection for
years.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Thomson:2014:CTU,
author = "Paul Thomson and Alastair F. Donaldson and Adam
Betts",
title = "Concurrency testing using schedule bounding: an
empirical study",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "15--28",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555260",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the first independent empirical study on
schedule bounding techniques for systematic concurrency
testing (SCT). We have gathered 52 buggy concurrent
software benchmarks, drawn from public code bases,
which we call SCTBench. We applied a modified version
of an existing concurrency testing tool to SCTBench to
attempt to answer several research questions,
including: How effective are the two main schedule
bounding techniques, preemption bounding and delay
bounding, at bug finding? What challenges are
associated with applying SCT to existing code? How
effective is schedule bounding compared to a naive
random scheduler at finding bugs? Our findings confirm
that delay bounding is superior to preemption bounding
and that schedule bounding is more effective at finding
bugs than unbounded depth-first search. The majority of
bugs in SCTBench can be exposed using a small bound
(1-3), supporting previous claims, but there is at
least one benchmark that requires 5 preemptions.
Surprisingly, we found that a naive random scheduler is
at least as effective as schedule bounding for finding
bugs. We have made SCTBench and our tools publicly
available for reproducibility and use in future work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Samak:2014:TDD,
author = "Malavika Samak and Murali Krishna Ramanathan",
title = "Trace driven dynamic deadlock detection and
reproduction",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "29--42",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555262",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic analysis techniques have been proposed to
detect potential deadlocks. Analyzing and comprehending
each potential deadlock to determine whether the
deadlock is feasible in a real execution requires
significant programmer effort. Moreover, empirical
evidence shows that existing analyses are quite
imprecise. This imprecision of the analyses further
void the manual effort invested in reasoning about
non-existent defects. In this paper, we address the
problems of imprecision of existing analyses and the
subsequent manual effort necessary to reason about
deadlocks. We propose a novel approach for deadlock
detection by designing a dynamic analysis that
intelligently leverages execution traces. To reduce the
manual effort, we replay the program by making the
execution follow a schedule derived based on the
observed trace. For a real deadlock, its feasibility is
automatically verified if the replay causes the
execution to deadlock. We have implemented our approach
as part of WOLF and have analyzed many large (upto
160KLoC) Java programs. Our experimental results show
that we are able to identify 74\% of the reported
defects as true (or false) positives automatically
leaving very few defects for manual analysis. The
overhead of our approach is negligible making it a
compelling tool for practical adoption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Chiang:2014:ESI,
author = "Wei-Fan Chiang and Ganesh Gopalakrishnan and Zvonimir
Rakamaric and Alexey Solovyev",
title = "Efficient search for inputs causing high
floating-point errors",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "43--52",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555265",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Tools for floating-point error estimation are
fundamental to program understanding and optimization.
In this paper, we focus on tools for determining the
input settings to a floating point routine that
maximizes its result error. Such tools can help support
activities such as precision allocation, performance
optimization, and auto-tuning. We benchmark current
abstraction-based precision analysis methods, and show
that they often do not work at scale, or generate
highly pessimistic error estimates, often caused by
non-linear operators or complex input constraints that
define the set of legal inputs. We show that while
concrete-testing-based error estimation methods based
on maintaining shadow values at higher precision can
search out higher error-inducing inputs, suit able
heuristic search guidance is key to finding higher
errors. We develop a heuristic search algorithm called
Binary Guided Random Testing (BGRT). In 45 of the 48
total benchmarks, including many real-world routines,
BGRT returns higher guaranteed errors. We also evaluate
BGRT against two other heuristic search methods called
ILS and PSO, obtaining better results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Tardieu:2014:XAP,
author = "Olivier Tardieu and Benjamin Herta and David
Cunningham and David Grove and Prabhanjan Kambadur and
Vijay Saraswat and Avraham Shinnar and Mikio Takeuchi
and Mandana Vaziri",
title = "{X10} and {APGAS} at Petascale",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "53--66",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555245",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "X10 is a high-performance, high-productivity
programming language aimed at large-scale distributed
and shared-memory parallel applications. It is based on
the Asynchronous Partitioned Global Address Space
(APGAS) programming model, supporting the same
fine-grained concurrency mechanisms within and across
shared-memory nodes. We demonstrate that X10 delivers
solid performance at petascale by running (weak
scaling) eight application kernels on an IBM Power 775
supercomputer utilizing up to 55,680 Power7 cores (for
1.7 Pflop/s of theoretical peak performance). We detail
our advances in distributed termination detection,
distributed load balancing, and use of high-performance
interconnects that enable X10 to scale out to tens of
thousands of cores. For the four HPC Class 2 Challenge
benchmarks, X10 achieves 41\% to 87\% of the system's
potential at scale (as measured by IBM's HPCC Class 1
optimized runs). We also implement K-Means,
Smith-Waterman, Betweenness Centrality, and Unbalanced
Tree Search (UTS) for geometric trees. Our UTS
implementation is the first to scale to petaflop
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Cunningham:2014:RXE,
author = "David Cunningham and David Grove and Benjamin Herta
and Arun Iyengar and Kiyokuni Kawachiya and Hiroki
Murata and Vijay Saraswat and Mikio Takeuchi and
Olivier Tardieu",
title = "Resilient {X10}: efficient failure-aware programming",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "67--80",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555248",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scale-out programs run on multiple processes in a
cluster. In scale-out systems, processes can fail.
Computations using traditional libraries such as MPI
fail when any component process fails. The advent of
Map Reduce, Resilient Data Sets and MillWheel has shown
dramatic improvements in productivity are possible when
a high-level programming framework handles scale-out
and resilience automatically. We are concerned with the
development of general-purpose languages that support
resilient programming. In this paper we show how the
X10 language and implementation can be extended to
support resilience. In Resilient X10, places may fail
asynchronously, causing loss of the data and tasks at
the failed place. Failure is exposed through
exceptions. We identify a {\em Happens Before
Invariance Principle} and require the runtime to
automatically repair the global control structure of
the program to maintain this principle. We show this
reduces much of the burden of resilient programming.
The programmer is only responsible for continuing
execution with fewer computational resources and the
loss of part of the heap, and can do so while taking
advantage of domain knowledge. We build a complete
implementation of the language, capable of executing
benchmark applications on hundreds of nodes. We
describe the algorithms required to make the language
runtime resilient. We then give three applications,
each with a different approach to fault tolerance
(replay, decimation, and domain-level checkpointing).
These can be executed at scale and survive node
failure. We show that for these programs the overhead
of resilience is a small fraction of overall runtime by
comparing to equivalent non-resilient X10 programs. On
one program we show end-to-end performance of Resilient
X10 is ~100x faster than Hadoop.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Yang:2014:PMI,
author = "Chaoran Yang and Wesley Bland and John Mellor-Crummey
and Pavan Balaji",
title = "Portable, {MPI}-interoperable {Coarray Fortran}",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "81--92",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555270",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The past decade has seen the advent of a number of
parallel programming models such as Coarray Fortran
(CAF), Unified Parallel C, X10, and Chapel. Despite the
productivity gains promised by these models, most
parallel scientific applications still rely on MPI as
their data movement model. One reason for this trend is
that it is hard for users to incrementally adopt these
new programming models in existing MPI applications.
Because each model use its own runtime system, they
duplicate resources and are potentially error-prone.
Such independent runtime systems were deemed necessary
because MPI was considered insufficient in the past to
play this role for these languages. The recently
released MPI-3, however, adds several new capabilities
that now provide all of the functionality needed to act
as a runtime, including a much more comprehensive
one-sided communication framework. In this paper, we
investigate how MPI-3 can form a runtime system for one
example programming model, CAF, with a broader goal of
enabling a single application to use both MPI and CAF
with the highest level of interoperability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Yang:2014:CNR,
author = "Yi Yang and Huiyang Zhou",
title = "{CUDA-NP}: realizing nested thread-level parallelism
in {GPGPU} applications",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "93--106",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555254",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parallel programs consist of series of code sections
with different thread-level parallelism (TLP). As a
result, it is rather common that a thread in a parallel
program, such as a GPU kernel in CUDA programs, still
contains both sequential code and parallel loops. In
order to leverage such parallel loops, the latest
Nvidia Kepler architecture introduces dynamic
parallelism, which allows a GPU thread to start another
GPU kernel, thereby reducing the overhead of launching
kernels from a CPU. However, with dynamic parallelism,
a parent thread can only communicate with its child
threads through global memory and the overhead of
launching GPU kernels is non-trivial even within GPUs.
In this paper, we first study a set of GPGPU benchmarks
that contain parallel loops, and highlight that these
bench-marks do not have a very high loop count or high
degrees of TLP. Consequently, the benefits of
leveraging such parallel loops using dynamic
parallelism are too limited to offset its overhead. We
then present our proposed solution to exploit nested
parallelism in CUDA, referred to as CUDA-NP. With
CUDA-NP, we initially enable a high number of threads
when a GPU program starts, and use control flow to
activate different numbers of threads for different
code sections. We implemented our proposed CUDA-NP
framework using a directive-based compiler approach.
For a GPU kernel, an application developer only needs
to add OpenMP-like pragmas for parallelizable code
sections. Then, our CUDA-NP compiler automatically
generates the optimized GPU kernels. It supports both
the reduction and the scan primitives, explores
different ways to distribute parallel loop iterations
into threads, and efficiently manages on-chip resource.
Our experiments show that for a set of GPGPU
benchmarks, which have already been optimized and
contain nested parallelism, our pro-posed CUDA-NP
framework further improves the performance by up to
6.69 times and 2.18 times on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Yan:2014:YYA,
author = "Shengen Yan and Chao Li and Yunquan Zhang and Huiyang
Zhou",
title = "{yaSpMV}: yet another {SpMV} framework on {GPUs}",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "107--118",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555255",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "SpMV is a key linear algebra algorithm and has been
widely used in many important application domains. As a
result, numerous attempts have been made to optimize
SpMV on GPUs to leverage their massive computational
throughput. Although the previous work has shown
impressive progress, load imbalance and high memory
bandwidth remain the critical performance bottlenecks
for SpMV. In this paper, we present our novel solutions
to these problems. First, we devise a new SpMV format,
called blocked compressed common coordinate (BCCOO),
which uses bit flags to store the row indices in a
blocked common coordinate (COO) format so as to
alleviate the bandwidth problem. We further improve
this format by partitioning the matrix into vertical
slices to enhance the cache hit rates when accessing
the vector to be multiplied. Second, we revisit the
segmented scan approach for SpMV to address the load
imbalance problem. We propose a highly efficient
matrix-based segmented sum/scan for SpMV and further
improve it by eliminating global synchronization. Then,
we introduce an auto-tuning framework to choose
optimization parameters based on the characteristics of
input sparse matrices and target hardware platforms.
Our experimental results on GTX680 GPUs and GTX480 GPUs
show that our proposed framework achieves significant
performance improvement over the vendor tuned CUSPARSE
V5.0 (up to 229\% and 65\% on average on GTX680 GPUs,
up to 150\% and 42\% on average on GTX480 GPUs) and
some most recently proposed schemes (e.g., up to 195\%
and 70\% on average over clSpMV on GTX680 GPUs, up to
162\% and 40\% on average over clSpMV on GTX480
GPUs).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Bauer:2014:SLW,
author = "Michael Bauer and Sean Treichler and Alex Aiken",
title = "{Singe}: leveraging warp specialization for high
performance on {GPUs}",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "119--130",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555258",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Singe, a Domain Specific Language (DSL)
compiler for combustion chemistry that leverages warp
specialization to produce high performance code for
GPUs. Instead of relying on traditional GPU programming
models that emphasize data-parallel computations, warp
specialization allows compilers like Singe to partition
computations into sub-computations which are then
assigned to different warps within a thread block.
Fine-grain synchronization between warps is performed
efficiently in hardware using producer-consumer named
barriers. Partitioning computations using warp
specialization allows Singe to deal efficiently with
the irregularity in both data access patterns and
computation. Furthermore, warp-specialized partitioning
of computations allows Singe to fit extremely large
working sets into on-chip memories. Finally, we
describe the architecture and general compilation
techniques necessary for constructing a
warp-specializing compiler. We show that the
warp-specialized code emitted by Singe is up to 3.75X
faster than previously optimized data-parallel GPU
kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Odaira:2014:EGI,
author = "Rei Odaira and Jose G. Castanos and Hisanobu Tomari",
title = "Eliminating global interpreter locks in {Ruby} through
hardware transactional memory",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "131--142",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555247",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many scripting languages use a Global Interpreter Lock
(GIL) to simplify the internal designs of their
interpreters, but this kind of lock severely lowers the
multi-thread performance on multi-core machines. This
paper presents our first results eliminating the GIL in
Ruby using Hardware Transactional Memory (HTM) in the
IBM zEnterprise EC12 and Intel 4th Generation Core
processors. Though prior prototypes replaced a GIL with
HTM, we tested realistic programs, the Ruby NAS
Parallel Benchmarks (NPB), the WEBrick HTTP server, and
Ruby on Rails. We devised a new technique to
dynamically adjust the transaction lengths on a
per-bytecode basis, so that we can optimize the
likelihood of transaction aborts against the relative
overhead of the instructions to begin and end the
transactions. Our results show that HTM achieved 1.9-
to 4.4-fold speedups in the NPB programs over the GIL
with 12 threads, and 1.6- and 1.2-fold speedups in
WEBrick and Ruby on Rails, respectively. The dynamic
transaction-length adjustment chose the best
transaction lengths for any number of threads and
applications with sufficiently long running times.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Petrovic:2014:LHM,
author = "Darko Petrovi{\'c} and Thomas Ropars and Andr{\'e}
Schiper",
title = "Leveraging hardware message passing for efficient
thread synchronization",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "143--154",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555251",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As the level of parallelism in manycore processors
keeps increasing, providing efficient mechanisms for
thread synchronization in concurrent programs is
becoming a major concern. On cache-coherent
shared-memory processors, synchronization efficiency is
ultimately limited by the performance of the underlying
cache coherence protocol. This paper studies how
hardware support for message passing can improve
synchronization performance. Considering the ubiquitous
problem of mutual exclusion, we adapt two
state-of-the-art solutions used on shared-memory
processors, namely the server approach and the
combining approach, to leverage the potential of
hardware message passing. We propose HybComb, a novel
combining algorithm that uses both message passing and
shared memory features of emerging hybrid processors.
We also introduce MP-Server, a straightforward
adaptation of the server approach to hardware message
passing. Evaluation on Tilera's TILE-Gx processor shows
that MP-Server can execute contended critical sections
with unprecedented throughput, as stalls related to
cache coherence are removed from the critical path.
HybComb can achieve comparable performance, while
avoiding the need to dedicate server cores.
Consequently, our queue and stack implementations,
based on MP-Server and HybComb, largely outperform
their most efficient pure-shared-memory counterparts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Herlihy:2014:WSF,
author = "Maurice Herlihy and Zhiyu Liu",
title = "Well-structured futures and cache locality",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "155--166",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555257",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In fork-join parallelism, a sequential program is
split into a directed acyclic graph of tasks linked by
directed dependency edges, and the tasks are executed,
possibly in parallel, in an order consistent with their
dependencies. A popular and effective way to extend
fork-join parallelism is to allow threads to create
{futures. A thread creates a future to hold the results
of a computation, which may or may not be executed in
parallel. That result is returned when some thread
touches that future, blocking if necessary until the
result is ready. Recent research has shown that while
futures can, of course, enhance parallelism in a
structured way, they can have a deleterious effect on
cache locality. In the worst case, futures can incur
\Omega (P T \infty + t T \infty ) deviations, which
implies \Omega (C P T \infty + C t T \infty )
additional cache misses, where C is the number of cache
lines, P is the number of processors, t is the number
of touches, and T \infty is the computation span. Since
cache locality has a large impact on software
performance on modern multicores, this result is
troubling. In this paper, however, we show that if
futures are used in a simple, disciplined way, then the
situation is much better: if each future is touched
only once, either by the thread that created it, or by
a later descendant of the thread that created it, then
parallel executions with work stealing can incur at
most O(C P T$^2$ \infty ) additional cache misses, a
substantial improvement. This structured use of futures
is characteristic of many (but not all) parallel
applications.}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Diegues:2014:TWL,
author = "Nuno Diegues and Paolo Romano",
title = "{Time-Warp}: lightweight abort minimization in
transactional memory",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "167--178",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555259",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The notion of permissiveness in Transactional Memory
(TM) translates to only aborting a transaction when it
cannot be accepted in any history that guarantees
correctness criterion. This property is neglected by
most TMs, which, in order to maximize implementation's
efficiency, resort to aborting transactions under
overly conservative conditions. In this paper we seek
to identify a sweet spot between permissiveness and
efficiency by introducing the Time-Warp Multi-version
algorithm (TWM). TWM is based on the key idea of
allowing an update transaction that has performed stale
reads (i.e., missed the writes of concurrently
committed transactions) to be serialized by committing
it in the past, which we call a time-warp commit. At
its core, TWM uses a novel, lightweight validation
mechanism with little computational overheads. TWM also
guarantees that read-only transactions can never be
aborted. Further, TWM guarantees Virtual World
Consistency, a safety property that is deemed as
particularly relevant in the context of TM. We
demonstrate the practicality of this approach through
an extensive experimental study, where we compare TWM
with four other TMs, and show an average performance
improvement of 65\% in high concurrency scenarios.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Olukotun:2014:BPP,
author = "Kunle Olukotun",
title = "Beyond parallel programming with domain specific
languages",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "179--180",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2557966",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today, almost all computer architectures are parallel
and heterogeneous; a combination of multiple CPUs, GPUs
and specialized processors. This creates a challenging
problem for application developers who want to develop
high performance programs without the effort required
to use low-level, architecture specific parallel
programming models (e.g., OpenMP for CMPs, CUDA for
GPUs, MPI for clusters). Domain-specific languages
(DSLs) are a promising solution to this problem because
they can provide an avenue for high-level
application-specific abstractions with implicit
parallelism to be mapped directly to low level
architecture-specific programming models; providing
both high programmer productivity and high execution
performance. In this talk I will describe an approach
to building high performance DSLs, which is based on
DSL embedding in a general purpose programming
language, metaprogramming and a DSL infrastructure
called Delite. I will describe how we transform DSL
programs into efficient first-order low-level code
using domain specific optimization, parallelism and
locality optimization with parallel patterns, and
architecture-specific code generation. All
optimizations and transformations are implemented in
Delite: an extensible DSL compiler infrastucture that
significantly reduces the effort required to develop
new DSLs. Delite DSLs for machine learning, data
querying, graph analysis, and scientific computing all
achieve performance competitive with manually
parallelized C++ code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Song:2014:DAT,
author = "Sukhyun Song and Jeffrey K. Hollingsworth",
title = "Designing and auto-tuning parallel {$3$-D FFT} for
computation-communication overlap",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "181--192",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555249",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a method to design and auto-tune a
new parallel 3-D FFT code using the non-blocking MPI
all-to-all operation. We achieve high performance by
optimizing computation-communication overlap. Our code
performs fully asynchronous communication without any
support from special hardware. We also improve cache
performance through loop tiling. To cope with the
complex trade-off regarding our optimization
techniques, we parameterize our code and auto-tune the
parameters efficiently in a large parameter space.
Experimental results from two systems confirm that our
code achieves a speedup of up to 1.76x over the FFTW
library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Catanzaro:2014:DPM,
author = "Bryan Catanzaro and Alexander Keller and Michael
Garland",
title = "A decomposition for in-place matrix transposition",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "193--206",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555253",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a decomposition for in-place matrix
transposition, with applications to Array of Structures
memory accesses on SIMD processors. Traditional
approaches to in-place matrix transposition involve
cycle following, which is difficult to parallelize, and
on matrices of dimension $m$ by $n$ require $O(mn \log
mn)$ work when limited to less than $O(mn)$ auxiliary
space. Our decomposition allows the rows and columns to
be operated on independently during in-place
transposition, reducing work complexity to $O(mn)$,
given $O(\max(m, n))$ auxiliary space. This
decomposition leads to an efficient and naturally
parallel algorithm: we have measured median throughput
of 19.5 GB/s on an NVIDIA Tesla K20c processor. An
implementation specialized for the skinny matrices that
arise when converting Arrays of Structures to
Structures of Arrays yields median throughput of 34.3
GB/s, and a maximum throughput of 51 GB/s. Because of
the simple structure of this algorithm, it is
particularly suited for implementation using SIMD
instructions to transpose the small arrays that arise
when SIMD processors load from or store to Arrays of
Structures. Using this algorithm to cooperatively
perform accesses to Arrays of Structures, we measure
180 GB/s throughput on the K20c, which is up to 45
times faster than compiler-generated Array of
Structures accesses. In this paper, we explain the
algorithm, prove its correctness and complexity, and
explain how it can be instantiated efficiently for
solving various transpose problems on both CPUs and
GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Sung:2014:PTR,
author = "I-Jui Sung and Juan G{\'o}mez-Luna and Jos{\'e}
Mar{\'\i}a Gonz{\'a}lez-Linares and Nicol{\'a}s Guil
and Wen-Mei W. Hwu",
title = "In-place transposition of rectangular matrices on
accelerators",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "207--218",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555266",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Matrix transposition is an important algorithmic
building block for many numeric algorithms such as FFT.
It has also been used to convert the storage layout of
arrays. With more and more algebra libraries offloaded
to GPUs, a high performance in-place transposition
becomes necessary. Intuitively, in-place transposition
should be a good fit for GPU architectures due to
limited available on-board memory capacity and high
throughput. However, direct application of CPU in-place
transposition algorithms lacks the amount of
parallelism and locality required by GPUs to achieve
good performance. In this paper we present the first
known in-place matrix transposition approach for the
GPUs. Our implementation is based on a novel 3-stage
transposition algorithm where each stage is performed
using an elementary tiled-wise transposition.
Additionally, when transposition is done as part of the
memory transfer between GPU and host, our staged
approach allows hiding transposition overhead by
overlap with PCIe transfer. We show that the 3-stage
algorithm allows larger tiles and achieves 3X speedup
over a traditional 4-stage algorithm, with both
algorithms based on our high-performance elementary
transpositions on the GPU. We also show our proposed
low-level optimizations improve the sustained
throughput to more than 20 GB/s. Finally, we propose an
asynchronous execution scheme that allows CPU threads
to delegate in-place matrix transposition to GPU,
achieving a throughput of more than 3.4 GB/s (including
data transfers costs), and improving current
multithreaded implementations of in-place transposition
on CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Maleki:2014:PDP,
author = "Saeed Maleki and Madanlal Musuvathi and Todd
Mytkowicz",
title = "Parallelizing dynamic programming through rank
convergence",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "219--232",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555264",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper proposes an efficient parallel algorithm
for an important class of dynamic programming problems
that includes Viterbi, Needleman-Wunsch,
Smith-Waterman, and Longest Common Subsequence. In
dynamic programming, the subproblems that do not depend
on each other, and thus can be computed in parallel,
form stages or wavefronts. The algorithm presented in
this paper provides additional parallelism allowing
multiple stages to be computed in parallel despite
dependences among them. The correctness and the
performance of the algorithm relies on rank convergence
properties of matrix multiplication in the tropical
semiring, formed with plus as the multiplicative
operation and max as the additive operation. This paper
demonstrates the efficiency of the parallel algorithm
by showing significant speed ups on a variety of
important dynamic programming problems. In particular,
the parallel Viterbi decoder is up-to 24x faster (with
64 processors) than a highly optimized commercial
baseline.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Mehta:2014:RLF,
author = "Sanyam Mehta and Pei-Hung Lin and Pen-Chung Yew",
title = "Revisiting loop fusion in the polyhedral framework",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "233--246",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555250",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Loop fusion is an important compiler optimization for
improving memory hierarchy performance through enabling
data reuse. Traditional compilers have approached loop
fusion in a manner decoupled from other high-level loop
optimizations, missing several interesting solutions.
Recently, the polyhedral compiler framework with its
ability to compose complex transformations, has proved
to be promising in performing loop optimizations for
small programs. However, our experiments with large
programs using state-of-the-art polyhedral compiler
frameworks reveal suboptimal fusion partitions in the
transformed code. We trace the reason for this to be
lack of an effective cost model to choose a good fusion
partitioning among the possible choices, which increase
exponentially with the number of program statements. In
this paper, we propose a fusion algorithm to choose
good fusion partitions with two objective functions ---
achieving good data reuse and preserving parallelism
inherent in the source code. These objectives, although
targeted by previous work in traditional compilers,
pose new challenges within the polyhedral compiler
framework and have thus not been addressed. In our
algorithm, we propose several heuristics that work
effectively within the polyhedral compiler framework
and allow us to achieve the proposed objectives.
Experimental results show that our fusion algorithm
achieves performance comparable to the existing
polyhedral compilers for small kernel programs, and
significantly outperforms them for large benchmark
programs such as those in the SPEC benchmark suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Rodrigues:2014:TPS,
author = "Christopher Rodrigues and Thomas Jablin and Abdul
Dakkak and Wen-Mei Hwu",
title = "{Triolet}: a programming system that unifies
algorithmic skeleton interfaces for high-performance
cluster computing",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "247--258",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555268",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional algorithmic skeletons promise a high-level
programming interface for distributed-memory clusters
that free developers from concerns of task
decomposition, scheduling, and communication.
Unfortunately, prior distributed functional skeleton
frameworks do not deliver performance comparable to
that achievable in a low-level distributed programming
model such as C with MPI and OpenMP, even when used in
concert with high-performance array libraries. There
are several causes: they do not take advantage of
shared memory on each cluster node; they impose a fixed
partitioning strategy on input data; and they have
limited ability to fuse loops involving skeletons that
produce a variable number of outputs per input. We
address these shortcomings in the Triolet programming
language through a modular library design that
separates concerns of parallelism, loop nesting, and
data partitioning. We show how Triolet substantially
improves the parallel performance of algorithms
involving array traversals and nested, variable-size
loops over what is achievable in Eden, a distributed
variant of Haskell. We further demonstrate how Triolet
can substantially simplify parallel programming
relative to C with MPI and OpenMP while achieving
23--100\% of its performance on a 128-core cluster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Liu:2014:TAP,
author = "Xu Liu and John Mellor-Crummey",
title = "A tool to analyze the performance of multithreaded
programs on {NUMA} architectures",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "259--272",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555271",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Almost all of today's microprocessors contain memory
controllers and directly attach to memory. Modern
multiprocessor systems support non-uniform memory
access (NUMA): it is faster for a microprocessor to
access memory that is directly attached than it is to
access memory attached to another processor. Without
careful distribution of computation and data, a
multithreaded program running on such a system may have
high average memory access latency. To use
multiprocessor systems efficiently, programmers need
performance tools to guide the design of NUMA-aware
codes. To address this need, we enhanced the HPCToolkit
performance tools to support measurement and analysis
of performance problems on multiprocessor systems with
multiple NUMA domains. With these extensions,
HPCToolkit helps pinpoint, quantify, and analyze NUMA
bottlenecks in executions of multithreaded programs. It
computes derived metrics to assess the severity of
bottlenecks, analyzes memory accesses, and provides a
wealth of information to guide NUMA optimization,
including information about how to distribute data to
reduce access latency and minimize contention. This
paper describes the design and implementation of our
extensions to HPCToolkit. We demonstrate their utility
by describing case studies in which we use these
capabilities to diagnose NUMA bottlenecks in four
multithreaded applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Rao:2014:TFE,
author = "Jia Rao and Xiaobo Zhou",
title = "Towards fair and efficient {SMP} virtual machine
scheduling",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "273--286",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555246",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "As multicore processors become prevalent in modern
computer systems, there is a growing need for
increasing hardware utilization and exploiting the
parallelism of such platforms. With virtualization
technology, hardware utilization is improved by
encapsulating independent workloads into virtual
machines (VMs) and consolidating them onto the same
machine. SMP virtual machines have been widely adopted
to exploit parallelism. For virtualized systems, such
as a public cloud, fairness between tenants and the
efficiency of running their applications are keys to
success. However, we find that existing virtualization
platforms fail to enforce fairness between VMs with
different number of virtual CPUs (vCPU) that run on
multiple CPUs. We attribute the unfairness to the use
of per-CPU schedulers and the load imbalance on these
CPUs that incur inaccurate CPU allocations.
Unfortunately, existing approaches to reduce
unfairness, e.g., dynamic load balancing and CPU
capping, introduce significant inefficiencies to
parallel workloads. In this paper, we present Flex, a
vCPU scheduling scheme that enforces fairness at
VM-level and improves the efficiency of hosted parallel
applications. Flex centers on two key designs: (1)
dynamically adjusting vCPU weights (FlexW) on multiple
CPUs to achieve VM-level fairness and (2) flexibly
scheduling vCPUs (FlexS) to minimize wasted
busy-waiting time. We have implemented Flex in Xen and
performed comprehensive evaluations with various
parallel workloads. Results show that Flex is able to
achieve CPU allocations with on average no more than
5\% error compared to the ideal fair allocation.
Further, Flex outperforms Xen's credit scheduler and
two representative co-scheduling approaches by as much
as $ 10 \times $ for parallel applications using
busy-waiting or blocking synchronization methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Lu:2014:EDM,
author = "Kai Lu and Xu Zhou and Tom Bergan and Xiaoping Wang",
title = "Efficient deterministic multithreading without global
barriers",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "287--300",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555252",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multithreaded programs execute nondeterministically on
conventional architectures and operating systems. This
complicates many tasks, including debugging and
testing. Deterministic multithreading (DMT) makes the
output of a multithreaded program depend on its inputs
only, which can totally solve the above problem.
However, current DMT implementations suffer from a
common inefficiency: they use frequent global barriers
to enforce a deterministic ordering on memory accesses.
In this paper, we eliminate that inefficiency using an
execution model we call deterministic lazy release
consistency (DLRC). Our execution model uses the Kendo
algorithm to enforce a deterministic ordering on
synchronization, and it uses a deterministic version of
the lazy release consistency memory model to propagate
memory updates across threads. Our approach guarantees
that programs execute deterministically even when they
contain data races. We implemented a DMT system based
on these ideas (RFDet) and evaluated it using 16
parallel applications. Our implementation targets C/C++
programs that use POSIX threads. Results show that
RFDet gains nearly 2x speedup compared with DThreads-a
start-of-the-art DMT system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Eslamimehr:2014:RDS,
author = "Mahdi Eslamimehr and Jens Palsberg",
title = "Race directed scheduling of concurrent programs",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "301--314",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555263",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Detection of data races in Java programs remains a
difficult problem. The best static techniques produce
many false positives, and also the best dynamic
techniques leave room for improvement. We present a new
technique called race directed scheduling that for a
given race candidate searches for an input and a
schedule that lead to the race. The search iterates a
combination of concolic execution and schedule
improvement, and turns out to find useful inputs and
schedules efficiently. We use an existing technique to
produce a manageable number of race candidates. Our
experiments on 23 Java programs found 72 real races
that were missed by the best existing dynamic
techniques. Among those 72 races, 31 races were found
with schedules that have between 1 million and 108
million events, which suggests that they are rare and
hard-to-find races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Rubin:2014:HCW,
author = "Norm Rubin",
title = "Heterogeneous computing: what does it mean for
compiler research?",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "315--316",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2558891",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The current trend in computer architecture is to
increase the number of cores, to create specialized
types of cores within a single machine, and to network
such machines together in very fluid web/cloud
computing arrangements. Compilers have traditionally
focused on optimizations to code that improve
performance, but is that the right target to speed up
real applications? Consider loading a web page (like
starting GMAIL) the page is transferred to the client,
any JavaScript is compiled, the JavaScript executes,
and the page gets displayed. The classic compiler model
(which was first developed in the late 50's) was a
great fit for single core machines but has fallen
behind architecture, and language. For example how do
you compile a single program for a machine that has
both a CPU and a graphics coprocessor (a GPU) with a
very different programming and memory model? Together
with the changes in architecture there have been
changes in programming languages. Dynamic languages are
used more, static languages are used less. How does
this effect compiler research? In this talk, I'll
review a number of traditional compiler research
challenges that have (or will) become burning issues
and will describe some new problems areas that were not
considered in the past. For example language
specifications are large complex technical documents
that are difficult for non-experts to follow.
Application programmers are often not willing to read
these documents; can a compiler bridge the gap?",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Natarajan:2014:FCL,
author = "Aravind Natarajan and Neeraj Mittal",
title = "Fast concurrent lock-free binary search trees",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "317--328",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555256",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new lock-free algorithm for concurrent
manipulation of a binary search tree in an asynchronous
shared memory system that supports search, insert and
delete operations. In addition to read and write
instructions, our algorithm uses (single-word)
compare-and-swap (CAS) and bit-test-and-set (SETB)
atomic instructions, both of which are commonly
supported by many modern processors including Intel~64
and AMD64. In contrast to existing lock-free algorithms
for a binary search tree, our algorithm is based on
marking edges rather than nodes. As a result, when
compared to other lock-free algorithms, modify (insert
and delete) operations in our algorithm work on a
smaller portion of the tree, thereby reducing
conflicts, and execute fewer atomic instructions (one
for insert and three for delete). Our experiments
indicate that our lock-free algorithm significantly
outperforms all other algorithms for a concurrent
binary search tree in many cases, especially when
contention is high, by as much as 100\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Brown:2014:GTN,
author = "Trevor Brown and Faith Ellen and Eric Ruppert",
title = "A general technique for non-blocking trees",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "329--342",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555267",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a general technique for obtaining provably
correct, non-blocking implementations of a large class
of tree data structures where pointers are directed
from parents to children. Updates are permitted to
modify any contiguous portion of the tree atomically.
Our non-blocking algorithms make use of the LLX, SCX
and VLX primitives, which are multi-word
generalizations of the standard LL, SC and VL
primitives and have been implemented from single-word
CAS. To illustrate our technique, we describe how it
can be used in a fairly straightforward way to obtain a
non-blocking implementation of a chromatic tree, which
is a relaxed variant of a red-black tree. The height of
the tree at any time is O(c + log n), where n is the
number of keys and c is the number of updates in
progress. We provide an experimental performance
analysis which demonstrates that our Java
implementation of a chromatic tree rivals, and often
significantly outperforms, other leading concurrent
dictionaries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Drachsler:2014:PCB,
author = "Dana Drachsler and Martin Vechev and Eran Yahav",
title = "Practical concurrent binary search trees via logical
ordering",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "343--356",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555269",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present practical, concurrent binary search tree
(BST) algorithms that explicitly maintain logical
ordering information in the data structure, permitting
clean separation from its physical tree layout. We
capture logical ordering using intervals, with the
property that an item belongs to the tree if and only
if the item is an endpoint of some interval. We are
thus able to construct efficient, synchronization-free
and intuitive lookup operations. We present (i) a
concurrent non-balanced BST with a lock-free lookup,
and (ii) a concurrent AVL tree with a lock-free lookup
that requires no synchronization with any mutating
operations, including balancing operations. Our
algorithms apply on-time deletion; that is, every
request for removal of a node, results in its immediate
removal from the tree. This new feature did not exist
in previous concurrent internal tree algorithms. We
implemented our concurrent BST algorithms and evaluated
them against several state-of-the-art concurrent tree
algorithms. Our experimental results show that our
algorithms with lock-free contains and on-time deletion
are practical and often comparable to the
state-of-the-art.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Timnat:2014:PWF,
author = "Shahar Timnat and Erez Petrank",
title = "A practical wait-free simulation for lock-free data
structures",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "357--368",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555261",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Lock-free data structures guarantee overall system
progress, whereas wait-free data structures guarantee
the progress of each and every thread, providing the
desirable non-starvation guarantee for concurrent data
structures. While practical lock-free implementations
are known for various data structures, wait-free data
structure designs are rare. Wait-free implementations
have been notoriously hard to design and often
inefficient. In this work we present a transformation
of lock-free algorithms to wait-free ones allowing even
a non-expert to transform a lock-free data-structure
into a practical wait-free one. The transformation
requires that the lock-free data structure is given in
a normalized form defined in this work. Using the new
method, we have designed and implemented wait-free
linked-list, skiplist, and tree and we measured their
performance. It turns out that for all these data
structures the wait-free implementations are only a few
percent slower than their lock-free counterparts, while
still guaranteeing non-starvation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Pusukuri:2014:LCA,
author = "Kishore Kumar Pusukuri and Rajiv Gupta and Laxmi
Narayan Bhuyan",
title = "Lock contention aware thread migrations",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "369--370",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555273",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "On a cache-coherent multicore multiprocessor system,
the performance of a multithreaded application with
high lock contention is very sensitive to the
distribution of application threads across multiple
processors. This is because the distribution of threads
impacts the frequency of lock transfers between
processors, which in turn impacts the frequency of
last-level cache (LLC) misses that lie on the critical
path of execution. Inappropriate distribution of
threads across processors increases LLC misses in the
critical path and significantly degrades performance of
multithreaded programs. To alleviate the above problem,
this paper overviews a thread migration technique,
which migrates threads of a multithreaded program
across multicore processors so that threads seeking
locks are more likely to find the locks on the same
processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Lee:2014:IFL,
author = "Kyu Hyung Lee and Dohyeong Kim and Xiangyu Zhang",
title = "Infrastructure-free logging and replay of concurrent
execution on multiple cores",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "371--372",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555274",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We develop a logging and replay technique for real
concurrent execution on multiple cores. Our technique
directly works on binaries and does not require any
hardware or complex software infrastructure support. We
focus on minimizing logging overhead as it only logs a
subset of system calls and thread spawns. Replay is on
a single core. During replay, our technique first tries
to follow only the event order in the log. However, due
to schedule differences, replay may fail. An
exploration process is then triggered to search for a
schedule that allows the replay to make progress.
Exploration is performed within a window preceding the
point of replay failure. During exploration, our
technique first tries to reorder synchronized blocks.
If that does not lead to progress, it further reorders
shared variable accesses. The exploration is
facilitated by a sophisticated caching mechanism. Our
experiments on real world programs and real workload
show that the proposed technique has very low logging
overhead (2.6\% on average) and fast schedule
reconstruction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Aguston:2014:PHC,
author = "Cfir Aguston and Yosi Ben Asher and Gadi Haber",
title = "Parallelization hints via code skeletonization",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "373--374",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555275",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Tools that provide optimization hints for program
developers are facing severe obstacles and often unable
to provide meaningful guidance on how to parallelize
real--life applications. The main reason is due to the
high code complexity and its large size when
considering commercially valuable code. Such code is
often rich with pointers, heavily nested conditional
statements, nested while--based loops, function calls,
etc. These constructs prevent existing compiler
analysis from extracting the full parallelization
potential. We propose a new paradigm to overcome this
issue by automatically transforming the code into a
much simpler skeleton-like form that is more conductive
for auto-parallelization. We then apply existing tools
of source--level automatic parallelization on the
skeletonized code in order to expose possible
parallelization patterns. The skeleton code, along with
the parallelized version, are then provided to the
programmer in the form of an IDE (Integrated
Development Environment) recommendation. The proposed
skeletonization algorithm replaces pointers by integer
indexes and C-struct references by references to
multi-dimensional arrays. This is because automatic
parallelizers cannot handle pointer expressions. For
example, {\tt while(p != NULL)\{ p->val++; p=p->next;
\}} will be skeletonized to the parallelizable {\tt
for(Ip=0;Ip < N; Ip++) \{ Aval[Ip]++; \}} where {\tt
Aval[]} holds the embedding of the original list. It
follows that the main goal of the skeletonization
process is to embed pointer-based data structures into
arrays. Though the skeletonized code is not
semantically equivalent to the original code, it points
out a possible parallelization pattern for this code
segment and can be used as an effective parallelization
hint to the programmer. We applied the method on
several representative benchmarks from SPEC CPU 2000
and reached up to 80\% performance gain after several
sequential code segments had been manually parallelized
based on the parallelization patterns of the generated
skeletons. In a different set of experiments we tried
to estimate the potential of skeletonization for a
larger set of programs in SPEC 2000 and obtained an
estimation of 27\% additional loops that can be
parallelized/vectorized due to skeletonization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Wang:2014:CBL,
author = "Wenwen Wang and Chenggang Wu and Pen-Chung Yew and
Xiang Yuan and Zhenjiang Wang and Jianjun Li and
Xiaobing Feng",
title = "Concurrency bug localization using shared memory
access pairs",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "375--376",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555276",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Non-determinism in concurrent programs makes their
debugging much more challenging than that in sequential
programs. To mitigate such difficulties, we propose a
new technique to automatically locate buggy shared
memory accesses that triggered concurrency bugs.
Compared to existing fault localization techniques that
are based on empirical statistical approaches, this
technique has two advantages. First, as long as enough
successful runs of a concurrent program are collected,
the proposed technique can locate buggy memory accesses
to the shared data even with only one single failed run
captured, as opposed to the need of capturing multiple
failed runs in other statistical approaches. Second,
the proposed technique is more precise because it
considers memory accesses in those failed runs that
terminate prematurely.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Leung:2014:TMS,
author = "Vitus J. Leung and David P. Bunde and Jonathan Ebbers
and Stefan P. Feer and Nickolas W. Price and Zachary D.
Rhodes and Matthew Swank",
title = "Task mapping stencil computations for non-contiguous
allocations",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "377--378",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555277",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We examine task mapping algorithms for systems that
allocate jobs non-contiguously. Several studies have
shown that task placement affects job running time. We
focus on jobs with a stencil communication pattern and
use experiments on a Cray XE to evaluate novel task
mapping algorithms as well as some adapted to this
setting. This is done with the miniGhost miniApp which
mimics the performance of CTH, a shock physics
application. Our strategies improve average and
single-run times by as much as 28\% and 36\% over a
baseline strategy, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Wimmer:2014:DST,
author = "Martin Wimmer and Francesco Versaci and Jesper Larsson
Tr{\"a}ff and Daniel Cederman and Philippas Tsigas",
title = "Data structures for task-based priority scheduling",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "379--380",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555278",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present three lock-free data structures for
priority task scheduling: a priority work-stealing one,
a centralized one with \rho -relaxed semantics, and a
hybrid one combining both concepts. With the
single-source shortest path (SSSP) problem as example,
we show how the different approaches affect the
prioritization and provide upper bounds on the number
of examined nodes. We argue that priority task
scheduling allows for an intuitive and easy way to
parallelize the SSSP problem, notoriously a hard task.
Experimental evidence supports the good scalability of
the resulting algorithm. The larger aim of this work is
to understand the trade-offs between scalability and
priority guarantees in task scheduling systems. We show
that \rho -relaxation is a valuable technique for
improving the first, while still allowing semantic
constraints to be satisfied: the lock-free, hybrid
$k$-priority data structure can scale as well as
work-stealing, while still providing strong priority
scheduling guarantees, which depend on the parameter k.
Our theoretical results open up possibilities for even
more scalable data structures by adopting a weaker form
of \rho -relaxation, which still enables the semantic
constraints to be respected.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Gomez:2014:DSD,
author = "Leonardo Bautista Gomez and Franck Cappello",
title = "Detecting silent data corruption through data dynamic
monitoring for scientific applications",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "381--382",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555279",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parallel programming has become one of the best ways
to express scientific models that simulate a wide range
of natural phenomena. These complex parallel codes are
deployed and executed on large-scale parallel
computers, making them important tools for scientific
discovery. As supercomputers get faster and larger, the
increasing number of components is leading to higher
failure rates. In particular, the miniaturization of
electronic components is expected to lead to a dramatic
rise in soft errors and data corruption. Moreover, soft
errors can corrupt data silently and generate large
inaccuracies or wrong results at the end of the
computation. In this paper we propose a novel technique
to detect silent data corruption based on data
monitoring. Using this technique, an application can
learn the normal dynamics of its datasets, allowing it
to quickly spot anomalies. We evaluate our technique
with synthetic benchmarks and we show that our
technique can detect up to 50\% of injected errors
while incurring only negligible overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Sandes:2014:FGP,
author = "Edans F. de O. Sandes and Guillermo Miranda and Alba
C. M. A. Melo and Xavier Martorell and Eduard Ayguade",
title = "Fine-grain parallel megabase sequence comparison with
multiple heterogeneous {GPUs}",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "383--384",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555280",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper proposes and evaluates a parallel strategy
to execute the exact Smith-Waterman (SW) algorithm for
megabase DNA sequences in heterogeneous multi-GPU
platforms. In our strategy, the computation of a single
huge SW matrix is spread over multiple GPUs, which
communicate border elements to the neighbour, using a
circular buffer mechanism that hides the communication
overhead. We compared 4 pairs of human-chimpanzee
homologous chromosomes using 2 different GPU
environments, obtaining a performance of up to 140.36
GCUPS (Billion of cells processed per second) with 3
heterogeneous GPUS.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Golan-Gueta:2014:ASL,
author = "Guy Golan-Gueta and G. Ramalingam and Mooly Sagiv and
Eran Yahav",
title = "Automatic semantic locking",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "385--386",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555281",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we consider concurrent programs in
which the shared state consists of instances of
linearizable ADTs (abstract data types). We develop a
novel automated approach to concurrency control that
addresses a common need: the need to atomically execute
a code fragment, which may contain multiple ADT
operations on multiple ADT instances. In our approach,
each ADT implements ADT-specific semantic locking
operations that serve to exploit the semantics of ADT
operations. We develop a synthesis algorithm that
automatically inserts calls to these locking operations
in a set of given code fragments (in a client program)
to ensure that these code fragments execute atomically
without deadlocks, and without rollbacks. We have
implemented the synthesis algorithm and several
general-purpose ADTs with semantic locking. We have
applied the synthesis algorithm to several Java
programs that use these ADTs. Our results show that our
approach enables efficient and scalable
synchronization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Hassan:2014:OTB,
author = "Ahmed Hassan and Roberto Palmieri and Binoy
Ravindran",
title = "Optimistic transactional boosting",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "387--388",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555283",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Herlihy and Koskinen's transactional boosting
methodology addressed the challenge of converting
concurrent data structures into transactional ones. We
present an optimistic methodology for boosting
concurrent collections. Optimistic boosting allows
greater data structure-specific optimizations, easier
integration with STM frameworks, and lower restrictions
on the boosted operations than the original boosting
methodology.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Agrawal:2014:PGS,
author = "Kunal Agrawal and Jeremy T. Fineman and Brendan
Sheridan and Jim Sukha and Robert Utterback",
title = "Provably good scheduling for parallel programs that
use data structures through implicit batching",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "389--390",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555284",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This poster proposes an efficient runtime scheduler
that provides provable performance guarantees to
parallel programs that use data structures through the
use of implicit batching.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Ma:2014:TAC,
author = "Lin Ma and Kunal Agrawal and Roger D. Chamberlain",
title = "Theoretical analysis of classic algorithms on
highly-threaded many-core {GPUs}",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "391--392",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555285",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Threaded many-core memory (TMM) model provides a
framework to analyze the performance of algorithms on
GPUs. Here, we investigate the effectiveness of the TMM
model by analyzing algorithms for 3 classic problems
--- suffix tree/array for string matching, fast Fourier
transform, and merge sort --- under this model. Our
findings indicate that the TMM model can explain and
predict previously unexplained trends and artifacts in
experimental data.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Tomkins:2014:SIP,
author = "Daniel Tomkins and Timmie Smith and Nancy M. Amato and
Lawrence Rauchwerger",
title = "{SCCMulti}: an improved parallel strongly connected
components algorithm",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "393--394",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555286",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Tarjan's famous linear time, sequential algorithm for
finding the strongly connected components (SCCs) of a
graph relies on depth first search, which is inherently
sequential. Deterministic parallel algorithms solve
this problem in logarithmic time using matrix
multiplication techniques, but matrix multiplication
requires a large amount of total work. Randomized
algorithms based on reachability --- the ability to get
from one vertex to another along a directed path ---
greatly improve the work bound in the average case.
However, these algorithms do not always perform well;
for instance, Divide-and-Conquer Strong Components
(DCSC), a scalable, divide-and-conquer algorithm, has
good expected theoretical limits, but can perform very
poorly on graphs for which the maximum reachability of
any vertex is small. A related algorithm, MultiPivot,
gives very high probability guarantees on the total
amount of work for all graphs, but this improvement
introduces an overhead that increases the average
running time. This work introduces SCCMulti, a
multi-pivot improvement of DCSC that offers the same
consistency as MultiPivot without the time overhead. We
provide experimental results demonstrating SCCMulti's
scalability; these results also show that SCCMulti is
more consistent than DCSC and is always faster than
MultiPivot.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Luo:2014:ISM,
author = "Miao Luo and Xiaoyi Lu and Khaled Hamidouche and
Krishna Kandalla and Dhabaleswar K. Panda",
title = "Initial study of multi-endpoint runtime for {MPI +
OpenMP} hybrid programming model on multi-core
systems",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "395--396",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555287",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "State-of-the-art MPI libraries rely on locks to
guarantee thread-safety. This discourages application
developers from using multiple threads to perform MPI
operations. In this paper, we propose a high
performance, lock-free multi-endpoint MPI runtime,
which can achieve up to 40\% improvement for
point-to-point operation and one representative
collective operation with minimum or no modifications
to the existing applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Isaacs:2014:ELS,
author = "Katherine E. Isaacs and Todd Gamblin and Abhinav
Bhatele and Peer-Timo Bremer and Martin Schulz and
Bernd Hamann",
title = "Extracting logical structure and identifying
stragglers in parallel execution traces",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "397--398",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555288",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a new approach to automatically extract
an idealized logical structure from a parallel
execution trace. We use this structure to define
intuitive metrics such as the lateness of a process
involved in a parallel execution. By analyzing and
illustrating traces in terms of logical steps, we
leverage a developer's understanding of the
happened-before relations in a parallel program. This
technique can uncover dependency chains, elucidate
communication patterns, and highlight sources and
propagation of delays, all of which may be obscured in
a traditional trace visualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Fisher:2014:UFM,
author = "Kathleen Fisher",
title = "Using formal methods to enable more secure vehicles:
{DARPA}'s {HACMS} program",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "1--1",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628165",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Networked embedded systems are ubiquitous in modern
society. Examples include SCADA systems that manage
physical infrastructure, medical devices such as
pacemakers and insulin pumps, and vehicles such as
airplanes and automobiles. Such devices are connected
to networks for a variety of compelling reasons,
including the ability to access diagnostic information
conveniently, perform software updates, provide
innovative features, and lower costs. Researchers and
hackers have shown that these kinds of networked
embedded systems are vulnerable to remote attacks and
that such attacks can cause physical damage and can be
hidden from monitors [1, 4]. DARPA launched the HACMS
program to create technology to make such systems
dramatically harder to attack successfully.
Specifically, HACMS is pursuing a clean-slate, formal
methods-based approach to the creation of
high-assurance vehicles, where high assurance is
defined to mean functionally correct and satisfying
appropriate safety and security properties. Specific
technologies include program synthesis, domain-specific
languages, and theorem provers used as program
development environments. Targeted software includes
operating system components such as hypervisors,
microkernels, file systems, and device drivers as well
as control systems such as autopilots and adaptive
cruise controls. Program researchers are leveraging
existing high-assurance software including NICTA's seL4
microkernel and INRIA's CompCert compiler. Although the
HACMS project is less than halfway done, the program
has already achieved some remarkable success. At
program kick-off, a Red Team easily hijacked the
baseline open-source quadcopter that HACMS researchers
are using as a research platform. At the end of
eighteen months, the Red Team was not able to hijack
the newly-minted ``SMACCMCopter'' running
high-assurance HACMS code, despite being given six
weeks and full access to the source code of the copter.
An expert in penetration testing called the
SMACCMCopter ``the most secure UAV on the planet.'' In
this talk, I will describe the HACMS program: its
motivation, the underlying technologies, current
results, and future directions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Hickey:2014:BES,
author = "Patrick C. Hickey and Lee Pike and Trevor Elliott and
James Bielman and John Launchbury",
title = "Building embedded systems with embedded {DSLs}",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "3--9",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628146",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We report on our experiences in synthesizing a
fully-featured autopilot from embedded domain-specific
languages (EDSLs) hosted in Haskell. The autopilot is
approximately 50k lines of C code generated from 10k
lines of EDSL code and includes control laws, mode
logic, encrypted communications system, and device
drivers. The autopilot was built in less than two
engineer years. This is the story of how EDSLs provided
the productivity and safety gains to do large-scale
low-level embedded programming and lessons we learned
in doing so.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Schlesinger:2014:CNP,
author = "Cole Schlesinger and Michael Greenberg and David
Walker",
title = "Concurrent {NetCore}: from policies to pipelines",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "11--24",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628157",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In a Software-Defined Network (SDN), a central,
computationally powerful controller manages a set of
distributed, computationally simple switches. The
controller computes a policy describing how each switch
should route packets and populates packet-processing
tables on each switch with rules to enact the routing
policy. As network conditions change, the controller
continues to add and remove rules from switches to
adjust the policy as needed. Recently, the SDN
landscape has begun to change as several proposals for
new, reconfigurable switching architectures, such as
RMT [5] and FlexPipe [14] have emerged. These platforms
provide switch programmers with many, flexible tables
for storing packet-processing rules, and they offer
programmers control over the packet fields that each
table can analyze and act on. These reconfigurable
switch architectures support a richer SDN model in
which a switch configuration phase precedes the rule
population phase [4]. In the configuration phase, the
controller sends the switch a graph describing the
layout and capabilities of the packet processing tables
it will require during the population phase. Armed with
this foreknowledge, the switch can allocate its
hardware (or software) resources more efficiently. We
present a new, typed language, called Concurrent
NetCore, for specifying routing policies and graphs of
packet-processing tables. Concurrent NetCore includes
features for specifying sequential, conditional and
concurrent control-flow between packet-processing
tables. We develop a fine-grained operational model for
the language and prove this model coincides with a
higher-level denotational model when programs are
well-typed. We also prove several additional properties
of well-typed programs, including strong normalization
and determinism. To illustrate the utility of the
language, we develop linguistic models of both the RMT
and FlexPipe architectures and we give a multi-pass
compilation algorithm that translates graphs and
routing policies to the RMT model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Schoepe:2014:STI,
author = "Daniel Schoepe and Daniel Hedin and Andrei Sabelfeld",
title = "{SeLINQ}: tracking information across
application-database boundaries",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "25--38",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628151",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "The root cause for confidentiality and integrity
attacks against computing systems is insecure
information flow. The complexity of modern systems
poses a major challenge to secure end-to-end
information flow, ensuring that the insecurity of a
single component does not render the entire system
insecure. While information flow in a variety of
languages and settings has been thoroughly studied in
isolation, the problem of tracking information across
component boundaries has been largely out of reach of
the work so far. This is unsatisfactory because
tracking information across component boundaries is
necessary for end-to-end security. This paper proposes
a framework for uniform tracking of information flow
through both the application and the underlying
database. Key enabler of the uniform treatment is
recent work by Cheney et al., which studies database
manipulation via an embedded language-integrated query
language (with Microsoft's LINQ on the backend).
Because both the host language and the embedded query
languages are functional F\#-like languages, we are
able to leverage information-flow enforcement for
functional languages to obtain information-flow control
for databases ``for free'', synergize it with
information-flow control for applications and thus
guarantee security across application-database
boundaries. We develop the formal results in the form
of a security type system that includes a treatment of
algebraic data types and pattern matching, and
establish its soundness. On the practical side, we
implement the framework and demonstrate its usefulness
in a case study with a realistic movie rental
database.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Chen:2014:TBP,
author = "Sheng Chen and Martin Erwig",
title = "Type-based parametric analysis of program families",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "39--51",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628155",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Previous research on static analysis for program
families has focused on lifting analyses for single,
plain programs to program families by employing
idiosyncratic representations. The lifting effort
typically involves a significant amount of work for
proving the correctness of the lifted algorithm and
demonstrating its scalability. In this paper, we
propose a parameterized static analysis framework for
program families that can automatically lift a class of
type-based static analyses for plain programs to
program families. The framework consists of a
parametric logical specification and a parametric
variational constraint solver. We prove that a lifted
algorithm is correct provided that the underlying
analysis algorithm is correct. An evaluation of our
framework has revealed an error in a previous manually
lifted analysis. Moreover, performance tests indicate
that the overhead incurred by the general framework is
bounded by a factor of 2.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Stansifer:2014:RSM,
author = "Paul Stansifer and Mitchell Wand",
title = "{Romeo}: a system for more flexible binding-safe
programming",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "53--65",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628162",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Current languages for safely manipulating values with
names only support term languages with simple binding
syntax. As a result, no tools exist to safely
manipulate code written in those languages for which
name problems are the most challenging. We address this
problem with Romeo, a language that respects $ \alpha
$-equivalence on its values, and which has access to a
rich specification language for binding, inspired by
attribute grammars. Our work has the complex-binding
support of David Herman's $ \lambda_m$, but is a
full-fledged binding-safe language like Pure FreshML.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Grabmayer:2014:MSL,
author = "Clemens Grabmayer and Jan Rochel",
title = "Maximal sharing in the {Lambda} calculus with letrec",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "67--80",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628148",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Increasing sharing in programs is desirable to
compactify the code, and to avoid duplication of
reduction work at run-time, thereby speeding up
execution. We show how a maximal degree of sharing can
be obtained for programs expressed as terms in the
lambda calculus with letrec. We introduce a notion of
`maximal compactness' for $ \lambda_{\rm letrec}$-terms
among all terms with the same infinite unfolding.
Instead of defined purely syntactically, this notion is
based on a graph semantics. $ \lambda_{\rm
letrec}$-terms are interpreted as first-order term
graphs so that unfolding equivalence between terms is
preserved and reflected through bisimilarity of the
term graph interpretations. Compactness of the term
graphs can then be compared via functional
bisimulation. We describe practical and efficient
methods for the following two problems: transforming a
$ \lambda_{\rm letrec}$-term into a maximally compact
form; and deciding whether two $ \lambda_{\rm
letrec}$-terms are unfolding-equivalent. The
transformation of a $ \lambda_{\rm letrec}$-terms $L$
into maximally compact form $ L_0$ proceeds in three
steps: (i) translate $L$ into its term graph $ G =
[[L]]$; (ii) compute the maximally shared form of $G$
as its bisimulation collapse $ G_0$; (iii) read back a
$ \lambda_{\rm letrec}$-term $ L_0$ from the term graph
$ G_0$ with the property $ [[L_0]] = G_0$. Then $ L_0$
represents a maximally shared term graph, and it has
the same unfolding as $L$. The procedure for deciding
whether two given $ \lambda_{\rm letrec}$-terms $ L_1$
and $ L_2$ are unfolding-equivalent computes their term
graph interpretations $ [[L_1]]$ and $ [[L_2]]$, and
checks whether these are bisimilar. For illustration,
we also provide a readily usable implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Bergstrom:2014:PEH,
author = "Lars Bergstrom and Matthew Fluet and Matthew Le and
John Reppy and Nora Sandler",
title = "Practical and effective higher-order optimizations",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "81--93",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628153",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Inlining is an optimization that replaces a call to a
function with that function's body. This optimization
not only reduces the overhead of a function call, but
can expose additional optimization opportunities to the
compiler, such as removing redundant operations or
unused conditional branches. Another optimization, copy
propagation, replaces a redundant copy of a still-live
variable with the original. Copy propagation can reduce
the total number of live variables, reducing register
pressure and memory usage, and possibly eliminating
redundant memory-to-memory copies. In practice, both of
these optimizations are implemented in nearly every
modern compiler. These two optimizations are practical
to implement and effective in first-order languages,
but in languages with lexically-scoped first-class
functions (aka, closures), these optimizations are not
available to code programmed in a higher-order style.
With higher-order functions, the analysis challenge has
been that the environment at the call site must be the
same as at the closure capture location, up to the free
variables, or the meaning of the program may change.
Olin Shivers' 1991 dissertation called this family of
optimizations super $ \Beta $ and he proposed one
analysis technique, called reflow, to support these
optimizations. Unfortunately, reflow has proven too
expensive to implement in practice. Because these
higher-order optimizations are not available in
functional-language compilers, programmers studiously
avoid uses of higher-order values that cannot be
optimized (particularly in compiler benchmarks). This
paper provides the first practical and effective
technique for super $ \Beta $ (higher-order) inlining
and copy propagation, which we call unchanged variable
analysis. We show that this technique is practical by
implementing it in the context of a real compiler for
an ML-family language and showing that the required
analyses have costs below 3\% of the total compilation
time. This technique's effectiveness is shown through a
set of benchmarks and example programs, where this
analysis exposes additional potential optimization
sites.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Hackett:2014:WWM,
author = "Jennifer Hackett and Graham Hutton",
title = "Worker\slash wrapper\slash makes it\slash faster",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "95--107",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628142",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Much research in program optimization has focused on
formal approaches to correctness: proving that the
meaning of programs is preserved by the optimisation.
Paradoxically, there has been comparatively little work
on formal approaches to efficiency: proving that the
performance of optimized programs is actually improved.
This paper addresses this problem for a general-purpose
optimization technique, the worker/wrapper
transformation. In particular, we use the call-by-need
variant of improvement theory to establish conditions
under which the worker/wrapper transformation is
formally guaranteed to preserve or improve the time
performance of programs in lazy languages such as
Haskell.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Downen:2014:CSC,
author = "Paul Downen and Zena M. Ariola",
title = "Compositional semantics for composable continuations:
from abortive to delimited control",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "109--122",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628147",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parigot's $ \lambda \mu $-calculus, a system for
computational reasoning about classical proofs, serves
as a foundation for control operations embodied by
operators like Scheme's callcc. We demonstrate that the
call-by-value theory of the $ \lambda \mu $-calculus
contains a latent theory of delimited control, and that
a known variant of $ \lambda \mu $ which unshackles the
syntax yields a calculus of composable continuations
from the existing constructs and rules for classical
control. To relate to the various formulations of
control effects, and to continuation-passing style, we
use a form of compositional program transformations
which preserves the underlying structure of equational
theories, contexts, and substitution. Finally, we
generalize the call-by-name and call-by-value theories
of the $ \lambda \mu $-calculus by giving a single
parametric theory that encompasses both, allowing us to
generate a call-by-need instance that defines a
calculus of classical and delimited control with lazy
evaluation and sharing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Petricek:2014:CCC,
author = "Tomas Petricek and Dominic Orchard and Alan Mycroft",
title = "Coeffects: a calculus of context-dependent
computation",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "123--135",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628160",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The notion of context in functional languages no
longer refers just to variables in scope. Context can
capture additional properties of variables (usage
patterns in linear logics; caching requirements in
dataflow languages) as well as additional resources or
properties of the execution environment (rebindable
resources; platform version in a cross-platform
application). The recently introduced notion of
coeffects captures the latter, whole-context
properties, but it failed to capture fine-grained
per-variable properties. We remedy this by developing a
generalized coeffect system with annotations indexed by
a coeffect shape. By instantiating a concrete shape,
our system captures previously studied flat
(whole-context) coeffects, but also structural
(per-variable) coeffects, making coeffect analyses more
useful. We show that the structural system enjoys
desirable syntactic properties and we give a
categorical semantics using extended notions of indexed
comonad. The examples presented in this paper are based
on analysis of established language features (liveness,
linear logics, dataflow, dynamic scoping) and we argue
that such context-aware properties will also be useful
for future development of languages for increasingly
heterogeneous and distributed platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Findler:2014:BSC,
author = "Robert Bruce Findler",
title = "Behavioral software contracts",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "137--138",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2632855",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers embrace contracts. They can use the
language they know and love to formulate logical
assertions about the behavior of their programs. They
can use the existing IDE infrastructure to log
contracts, to test, to debug, and to profile their
programs. The keynote presents the challenges and
rewards of supporting contracts in a modern,
full-spectrum programming language. It covers technical
challenges of contracts while demonstrating the
non-technical motivation for contract system design
choices and showing how contracts and contract research
can serve practicing programmers. The remainder of this
article is a literature survey of contract research,
with an emphasis on recent work about higher-order
contracts and blame.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Nguyen:2014:SCV,
author = "Ph{\'u}c C. Nguyen and Sam Tobin-Hochstadt and David
{Van Horn}",
title = "Soft contract verification",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "139--152",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628156",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Behavioral software contracts are a widely used
mechanism for governing the flow of values between
components. However, run-time monitoring and
enforcement of contracts imposes significant overhead
and delays discovery of faulty components to run-time.
To overcome these issues, we present soft contract
verification, which aims to statically prove either
complete or partial contract correctness of components,
written in an untyped, higher-order language with
first-class contracts. Our approach uses higher-order
symbolic execution, leveraging contracts as a source of
symbolic values including unknown behavioral values,
and employs an updatable heap of contract invariants to
reason about flow-sensitive facts. We prove the
symbolic execution soundly approximates the dynamic
semantics and that verified programs can't be blamed.
The approach is able to analyze first-class contracts,
recursive data structures, unknown functions, and
control-flow-sensitive refinements of values, which are
all idiomatic in dynamic languages. It makes effective
use of an off-the-shelf solver to decide problems
without heavy encodings. The approach is competitive
with a wide range of existing tools --- including type
systems, flow analyzers, and model checkers --- on
their own benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Ramsey:2014:THD,
author = "Norman Ramsey",
title = "On teaching *how to design programs*: observations
from a newcomer",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "153--166",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628137",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a personal, qualitative case study
of a first course using How to Design Programs and its
functional teaching languages. The paper
reconceptualizes the book's six-step design process as
an eight-step design process ending in a new ``review
and refactor'' step. It recommends specific approaches
to students' difficulties with function descriptions,
function templates, data examples, and other parts of
the design process. It connects the process to
interactive ``world programs.'' It recounts
significant, informative missteps in course design and
delivery. Finally, it identifies some unsolved teaching
problems and some potential solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Ohori:2014:SIP,
author = "Atsushi Ohori and Katsuhiro Ueno and Kazunori Hoshi
and Shinji Nozaki and Takashi Sato and Tasuku Makabe
and Yuki Ito",
title = "{SML\#} in industry: a practical {ERP} system
development",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "167--173",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628164",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper reports on our industry-academia project of
using a functional language in business software
production. The general motivation behind the project
is our ultimate goal of adopting an ML-style
higher-order typed functional language in a wide range
of ordinary software development in industry. To probe
the feasibility and identify various practical problems
and needs, we have conducted a 15 month pilot project
for developing an enterprise resource planning (ERP)
system in SML\#. The project has successfully completed
as we have planned, demonstrating the feasibility of
SML\#. In particular, seamless integration of SQL and
direct C language interface are shown to be useful in
reliable and efficient development of a data intensive
business application. During the program development,
we have found several useful functional programming
patterns and a number of possible extensions of an
ML-style language with records. This paper reports on
the project details and the lessons learned from the
project.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Mulligan:2014:LRE,
author = "Dominic P. Mulligan and Scott Owens and Kathryn E.
Gray and Tom Ridge and Peter Sewell",
title = "{Lem}: reusable engineering of real-world semantics",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "175--188",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628143",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent years have seen remarkable successes in
rigorous engineering: using mathematically rigorous
semantic models (not just idealised calculi) of
real-world processors, programming languages,
protocols, and security mechanisms, for testing, proof,
analysis, and design. Building these models is
challenging, requiring experimentation, dialogue with
vendors or standards bodies, and validation; their
scale adds engineering issues akin to those of
programming to the task of writing clear and usable
mathematics. But language and tool support for
specification is lacking. Proof assistants can be used
but bring their own difficulties, and a model produced
in one, perhaps requiring many person-years effort and
maintained over an extended period, cannot be used by
those familiar with another. We introduce Lem, a
language for engineering reusable large-scale semantic
models. The Lem design takes inspiration both from
functional programming languages and from proof
assistants, and Lem definitions are translatable into
OCaml for testing, Coq, HOL4, and Isabelle/HOL for
proof, and LaTeX and HTML for presentation. This
requires a delicate balance of expressiveness, careful
library design, and implementation of transformations
--- akin to compilation, but subject to the constraint
of producing usable and human-readable code for each
target. Lem's effectiveness is demonstrated by its use
in practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Breitner:2014:SZC,
author = "Joachim Breitner and Richard A. Eisenberg and Simon
Peyton Jones and Stephanie Weirich",
title = "Safe zero-cost coercions for {Haskell}",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "189--202",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628141",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Generative type abstractions --- present in Haskell,
OCaml, and other languages --- are useful concepts to
help prevent programmer errors. They serve to create
new types that are distinct at compile time but share a
run-time representation with some base type. We present
a new mechanism that allows for zero-cost conversions
between generative type abstractions and their
representations, even when such types are deeply
nested. We prove type safety in the presence of these
conversions and have implemented our work in GHC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Pottier:2014:HME,
author = "Fran{\c{c}}ois Pottier",
title = "{Hindley--Milner} elaboration in applicative style:
functional pearl",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "203--212",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628145",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type inference --- the problem of determining whether
a program is well-typed --- is well-understood. In
contrast, elaboration --- the task of constructing an
explicitly-typed representation of the program ---
seems to have received relatively little attention,
even though, in a non-local type inference system, it
is non-trivial. We show that the constraint-based
presentation of Hindley--Milner type inference can be
extended to deal with elaboration, while preserving its
elegance. This involves introducing a new notion of
``constraint with a value'', which forms an applicative
functor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Winograd-Cort:2014:SNI,
author = "Daniel Winograd-Cort and Paul Hudak",
title = "Settable and non-interfering signal functions for
{FRP}: how a first-order switch is more than enough",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "213--225",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628140",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional Reactive Programming (FRP) provides a
method for programming continuous, reactive systems by
utilizing signal functions that, abstractly, transform
continuous input signals into continuous output
signals. These signals may also be streams of events,
and indeed, by allowing signal functions themselves to
be the values carried by these events (in essence,
signals of signal functions), one can conveniently make
discrete changes in program behavior by ``switching''
into and out of these signal functions. This
higher-order notion of switching is common among many
FRP systems, in particular those based on arrows, such
as Yampa. Although convenient, the power of switching
is often an overkill and can pose problems for certain
types of program optimization (such as causal
commutative arrows [14]), as it causes the structure of
the program to change dynamically at run-time. Without
a notion of just-in-time compilation or related idea,
which itself is beset with problems, such optimizations
are not possible at compile time. This paper introduces
two new ideas that obviate, in a predominance of cases,
the need for switching. The first is a non-interference
law for arrows with choice that allows an arrowized FRP
program to dynamically alter its own structure (within
statically limited bounds) as well as abandon unused
streams. The other idea is a notion of a settable
signal function that allows a signal function to
capture its present state and later be restarted from
some previous state. With these two features, canonical
uses of higher-order switchers can be replaced with a
suitable first-order design, thus enabling a broader
range of static optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Chen:2014:FPD,
author = "Yan Chen and Umut A. Acar and Kanat Tangwongsan",
title = "Functional programming for dynamic and large data with
self-adjusting computation",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "227--240",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628150",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Combining type theory, language design, and empirical
work, we present techniques for computing with large
and dynamically changing datasets. Based on lambda
calculus, our techniques are suitable for expressing a
diverse set of algorithms on large datasets and, via
self-adjusting computation, enable computations to
respond automatically to changes in their data. To
improve the scalability of self-adjusting computation,
we present a type system for precise dependency
tracking that minimizes the time and space for storing
dependency metadata. The type system eliminates an
important assumption of prior work that can lead to
recording spurious dependencies. We present a
type-directed translation algorithm that generates
correct self-adjusting programs without relying on this
assumption. We then show a probabilistic-chunking
technique to further decrease space usage by
controlling the fundamental space-time tradeoff in
self-adjusting computation. We implement and evaluate
these techniques, showing promising results on
challenging benchmarks involving large graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Weirich:2014:DT,
author = "Stephanie Weirich",
title = "Depending on types",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "241--241",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2631168",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Is Haskell a dependently typed programming language?
Should it be? GHC's many type-system features, such as
Generalized Algebraic Datatypes (GADTs), datatype
promotion, multiparameter type classes, and type
families, give programmers the ability to encode
domain-specific invariants in their types. Clever
Haskell programmers have used these features to enhance
the reasoning capabilities of static type checking. But
really, how far have we come? Could we do more? In this
talk, I will discuss dependently typed programming in
Haskell, through examples, analysis and comparisons
with modern full-spectrum dependently typed languages,
such as Coq, Agda and Idris. What sorts of dependently
typed programming can be done in Haskell now? What
could GHC learn from these languages? Conversely, what
lessons can GHC offer in return?",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Angiuli:2014:HPT,
author = "Carlo Angiuli and Edward Morehouse and Daniel R.
Licata and Robert Harper",
title = "Homotopical patch theory",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "243--256",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628158",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Homotopy type theory is an extension of Martin-L{\"o}f
type theory, based on a correspondence with homotopy
theory and higher category theory. In homotopy type
theory, the propositional equality type becomes
proof-relevant, and corresponds to paths in a space.
This allows for a new class of datatypes, called higher
inductive types, which are specified by constructors
not only for points but also for paths. In this paper,
we consider a programming application of higher
inductive types. Version control systems such as Darcs
are based on the notion of patches --- syntactic
representations of edits to a repository. We show how
patch theory can be developed in homotopy type theory.
Our formulation separates formal theories of patches
from their interpretation as edits to repositories. A
patch theory is presented as a higher inductive type.
Models of a patch theory are given by maps out of that
type, which, being functors, automatically preserve the
structure of patches. Several standard tools of
homotopy theory come into play, demonstrating the use
of these methods in a practical programming context.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Cockx:2014:PMK,
author = "Jesper Cockx and Dominique Devriese and Frank
Piessens",
title = "Pattern matching without {K}",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "257--268",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628139",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "Dependent pattern matching is an intuitive way to
write programs and proofs in dependently typed
languages. It is reminiscent of both pattern matching
in functional languages and case analysis in on-paper
mathematics. However, in general it is incompatible
with new type theories such as homotopy type theory
(HoTT). As a consequence, proofs in such theories are
typically harder to write and to understand. The source
of this incompatibility is the reliance of dependent
pattern matching on the so-called K axiom --- also
known as the uniqueness of identity proofs --- which is
inadmissible in HoTT. The Agda language supports an
experimental criterion to detect definitions by pattern
matching that make use of the K axiom, but so far it
lacked a formal correctness proof. In this paper, we
propose a new criterion for dependent pattern matching
without K, and prove it correct by a translation to
eliminators in the style of Goguen et al. (2006). Our
criterion both allows more good definitions than
existing proposals, and solves a previously undetected
problem in the criterion offered by Agda. It has been
implemented in Agda and is the first to be supported by
a formal proof. Thus it brings the benefits of
dependent pattern matching to contexts where we cannot
assume K, such as HoTT. It also points the way to new
forms of dependent pattern matching, for example on
higher inductive types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Vazou:2014:RTH,
author = "Niki Vazou and Eric L. Seidel and Ranjit Jhala and
Dimitrios Vytiniotis and Simon Peyton-Jones",
title = "Refinement types for {Haskell}",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "269--282",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628161",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "SMT-based checking of refinement types for
call-by-value languages is a well-studied subject.
Unfortunately, the classical translation of refinement
types to verification conditions is unsound under lazy
evaluation. When checking an expression, such systems
implicitly assume that all the free variables in the
expression are bound to values. This property is
trivially guaranteed by eager, but does not hold under
lazy, evaluation. Thus, to be sound and precise, a
refinement type system for Haskell and the
corresponding verification conditions must take into
account which subset of binders actually reduces to
values. We present a stratified type system that labels
binders as potentially diverging or not, and that
(circularly) uses refinement types to verify the
labeling. We have implemented our system in L IQUID H
ASKELL and present an experimental evaluation of our
approach on more than 10,000 lines of widely used
Haskell libraries. We show that L IQUID H ASKELL is
able to prove 96\% of all recursive functions
terminating, while requiring a modest 1.7 lines of
termination-annotations per 100 lines of code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Schwerter:2014:TGE,
author = "Felipe Ba{\~n}ados Schwerter and Ronald Garcia and
{\'E}ric Tanter",
title = "A theory of gradual effect systems",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "283--295",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628149",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Effect systems have the potential to help software
developers, but their practical adoption has been very
limited. We conjecture that this limited adoption is
due in part to the difficulty of transitioning from a
system where effects are implicit and unrestricted to a
system with a static effect discipline, which must
settle for conservative checking in order to be
decidable. To address this hindrance, we develop a
theory of gradual effect checking, which makes it
possible to incrementally annotate and statically check
effects, while still rejecting statically inconsistent
programs. We extend the generic type-and-effect
framework of Marino and Millstein with a notion of
unknown effects, which turns out to be significantly
more subtle than unknown types in traditional gradual
typing. We appeal to abstract interpretation to develop
and validate the concepts of gradual effect checking.
We also demonstrate how an effect system formulated in
Marino and Millstein's framework can be automatically
extended to support gradual checking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{McBride:2014:HKY,
author = "Conor Thomas McBride",
title = "How to keep your neighbours in order",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "297--309",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628163",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "I present a datatype-generic treatment of recursive
container types whose elements are guaranteed to be
stored in increasing order, with the ordering invariant
rolled out systematically. Intervals, lists and binary
search trees are instances of the generic treatment. On
the journey to this treatment, I report a variety of
failed experiments and the transferable learning
experiences they triggered. I demonstrate that a total
element ordering is enough to deliver insertion and
flattening algorithms, and show that (with care about
the formulation of the types) the implementations
remain as usual. Agda's instance arguments and pattern
synonyms maximize the proof search done by the
typechecker and minimize the appearance of proofs in
program text, often eradicating them entirely.
Generalizing to indexed recursive container types,
invariants such as size and balance can be expressed in
addition to ordering. By way of example, I implement
insertion and deletion for 2-3 trees, ensuring both
order and balance by the discipline of type checking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Kaki:2014:RFH,
author = "Gowtham Kaki and Suresh Jagannathan",
title = "A relational framework for higher-order shape
analysis",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "311--324",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628159",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose the integration of a relational
specification framework within a dependent type system
capable of verifying complex invariants over the shapes
of algebraic datatypes. Our approach is based on the
observation that structural properties of such
datatypes can often be naturally expressed as
inductively-defined relations over the recursive
structure evident in their definitions. By interpreting
constructor applications (abstractly) in a relational
domain, we can define expressive relational
abstractions for a variety of complex data structures,
whose structural and shape invariants can be
automatically verified. Our specification language also
allows for definitions of parametric relations for
polymorphic data types that enable highly composable
specifications and naturally generalizes to
higher-order polymorphic functions. We describe an
algorithm that translates relational specifications
into a decidable fragment of first-order logic that can
be efficiently discharged by an SMT solver. We have
implemented these ideas in a type checker called
CATALYST that is incorporated within the MLton SML
compiler. Experimental results and case studies
indicate that our verification strategy is both
practical and effective.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Marlow:2014:TNF,
author = "Simon Marlow and Louis Brandy and Jonathan Coens and
Jon Purdy",
title = "There is no fork: an abstraction for efficient,
concurrent, and concise data access",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "325--337",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628144",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a new programming idiom for concurrency,
based on Applicative Functors, where concurrency is
implicit in the Applicative $ < * > $ operator. The
result is that concurrent programs can be written in a
natural applicative style, and they retain a high
degree of clarity and modularity while executing with
maximal concurrency. This idiom is particularly useful
for programming against external data sources, where
the application code is written without the use of
explicit concurrency constructs, while the
implementation is able to batch together multiple
requests for data from the same source, and fetch data
from multiple sources concurrently. Our abstraction
uses a cache to ensure that multiple requests for the
same data return the same result, which frees the
programmer from having to arrange to fetch data only
once, which in turn leads to greater modularity. While
it is generally applicable, our technique was designed
with a particular application in mind: an internal
service at Facebook that identifies particular types of
content and takes actions based on it. Our application
has a large body of business logic that fetches data
from several different external sources. The framework
described in this paper enables the business logic to
execute efficiently by automatically fetching data
concurrently; we present some preliminary results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Gibbons:2014:FDS,
author = "Jeremy Gibbons and Nicolas Wu",
title = "Folding domain-specific languages: deep and shallow
embeddings (functional Pearl)",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "339--347",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628138",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A domain-specific language can be implemented by
embedding within a general-purpose host language. This
embedding may be deep or shallow, depending on whether
terms in the language construct syntactic or semantic
representations. The deep and shallow styles are
closely related, and intimately connected to folds; in
this paper, we explore that connection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Fredriksson:2014:KNS,
author = "Olle Fredriksson and Dan R. Ghica",
title = "{Krivine} nets: a semantic foundation for distributed
execution",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "349--361",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628152",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We define a new approach to compilation to distributed
architectures based on networks of abstract machines.
Using it we can implement a generalised and fully
transparent form of Remote Procedure Call that supports
calling higher-order functions across node boundaries,
without sending actual code. Our starting point is the
classic Krivine machine, which implements reduction for
untyped call-by-name PCF. We successively add the
features that we need for distributed execution and
show the correctness of each addition. Then we
construct a two-level operational semantics, where the
high level is a network of communicating machines, and
the low level is given by local machine transitions.
Using these networks, we arrive at our final system,
the Krivine Net. We show that Krivine Nets give a
correct distributed implementation of the Krivine
machine, which preserves both termination and
non-termination properties. All the technical results
have been formalised and proved correct in Agda. We
also implement a prototype compiler which we compare
with previous distributing compilers based on Girard's
Geometry of Interaction and on Game Semantics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Accattoli:2014:DAM,
author = "Beniamino Accattoli and Pablo Barenbaum and Damiano
Mazza",
title = "Distilling abstract machines",
journal = j-SIGPLAN,
volume = "49",
number = "9",
pages = "363--376",
month = sep,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692915.2628154",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is well-known that many environment-based abstract
machines can be seen as strategies in lambda calculi
with explicit substitutions (ES). Recently, graphical
syntaxes and linear logic led to the linear
substitution calculus (LSC), a new approach to ES that
is halfway between small-step calculi and traditional
calculi with ES. This paper studies the relationship
between the LSC and environment-based abstract
machines. While traditional calculi with ES simulate
abstract machines, the LSC rather distills them: some
transitions are simulated while others vanish, as they
map to a notion of structural congruence. The
distillation process unveils that abstract machines in
fact implement weak linear head reduction, a notion of
evaluation having a central role in the theory of
linear logic. We show that such a pattern applies
uniformly in call-by-name, call-by-value, and
call-by-need, catching many machines in the literature.
We start by distilling the KAM, the CEK, and a sketch
of the ZINC, and then provide simplified versions of
the SECD, the lazy KAM, and Sestoft's machine. Along
the way we also introduce some new machines with global
environments. Moreover, we show that distillation
preserves the time complexity of the executions, i.e.
the LSC is a complexity-preserving abstraction of
abstract machines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '14 conference proceedings.",
}
@Article{Chong:2014:CCT,
author = "Stephen Chong",
title = "Checking correctness of {TypeScript} interfaces for
{JavaScript} libraries",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "1--16",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660215",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The TypeScript programming language adds optional
types to JavaScript, with support for interaction with
existing JavaScript libraries via interface
declarations. Such declarations have been written for
hundreds of libraries, but they can be difficult to
write and often contain errors, which may affect the
type checking and misguide code completion for the
application code in IDEs. We present a pragmatic
approach to check correctness of TypeScript declaration
files with respect to JavaScript library
implementations. The key idea in our algorithm is that
many declaration errors can be detected by an analysis
of the library initialization state combined with a
light-weight static analysis of the library function
code. Our experimental results demonstrate the
effectiveness of the approach: it has found 142 errors
in the declaration files of 10 libraries, with an
analysis time of a few minutes per library and with a
low number of false positives. Our analysis of how
programmers use library interface declarations
furthermore reveals some practical limitations of the
TypeScript type system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Andreasen:2014:DSA,
author = "Esben Andreasen and Anders M{\o}ller",
title = "Determinacy in static analysis for {jQuery}",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "17--31",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660214",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Static analysis for JavaScript can potentially help
programmers find errors early during development.
Although much progress has been made on analysis
techniques, a major obstacle is the prevalence of
libraries, in particular jQuery, which apply
programming patterns that have detrimental consequences
on the analysis precision and performance. Previous
work on dynamic determinacy analysis has demonstrated
how information about program expressions that always
resolve to a fixed value in some call context may lead
to significant scalability improvements of static
analysis for such code. We present a static dataflow
analysis for JavaScript that infers and exploits
determinacy information on-the-fly, to enable analysis
of some of the most complex parts of jQuery. The
analysis combines selective context and path
sensitivity, constant propagation, and branch pruning,
based on a systematic investigation of the main causes
of analysis imprecision when using a more basic
analysis. The techniques are implemented in the TAJS
analysis tool and evaluated on a collection of small
programs that use jQuery. Our results show that the
proposed analysis techniques boost both precision and
performance, specifically for inferring type
information and call graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Pradel:2014:EAR,
author = "Michael Pradel and Parker Schuh and George Necula and
Koushik Sen",
title = "{EventBreak}: analyzing the responsiveness of user
interfaces through performance-guided test generation",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "33--47",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660233",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Event-driven user interface applications typically
have a single thread of execution that processes event
handlers in response to input events triggered by the
user, the network, or other applications. Programmers
must ensure that event handlers terminate after a short
amount of time because otherwise, the application may
become unresponsive. This paper presents EventBreak, a
performance-guided test generation technique to
identify and analyze event handlers whose execution
time may gradually increase while using the
application. The key idea is to systematically search
for pairs of events where triggering one event
increases the execution time of the other event. For
example, this situation may happen because one event
accumulates data that is processed by the other event.
We implement the approach for JavaScript-based web
applications and apply it to three real-world
applications. EventBreak discovers events with an
execution time that gradually increases in an unbounded
way, which makes the application unresponsive, and
events that, if triggered repeatedly, reveal a severe
scalability problem, which makes the application
unusable. The approach reveals two known bugs and four
previously unknown responsiveness problems.
Furthermore, we show that EventBreak helps in testing
that event handlers avoid such problems by bounding a
handler's execution time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Hsiao:2014:UWC,
author = "Chun-Hung Hsiao and Michael Cafarella and Satish
Narayanasamy",
title = "Using web corpus statistics for program analysis",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "49--65",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660226",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several program analysis tools --- such as plagiarism
detection and bug finding --- rely on knowing a piece
of code's relative semantic importance. For example, a
plagiarism detector should not bother reporting two
programs that have an identical simple loop counter
test, but should report programs that share more
distinctive code. Traditional program analysis
techniques ( e.g., finding data and control
dependencies) are useful, but do not say how surprising
or common a line of code is. Natural language
processing researchers have encountered a similar
problem and addressed it using an n -gram model of text
frequency, derived from statistics computed over text
corpora. We propose and compute an n -gram model for
programming languages, computed over a corpus of 2.8
million JavaScript programs we downloaded from the Web.
In contrast to previous techniques, we describe a code
n -gram as a subgraph of the program dependence graph
that contains all nodes and edges reachable in n steps
from the statement. We can count n -grams in a program
and count the frequency of n -grams in the corpus,
enabling us to compute tf-idf -style measures that
capture the differing importance of different lines of
code. We demonstrate the power of this approach by
implementing a plagiarism detector with accuracy that
beats previous techniques, and a bug-finding tool that
discovered over a dozen previously unknown bugs in a
collection of real deployed programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Barr:2014:TAT,
author = "Earl T. Barr and Mark Marron",
title = "{Tardis}: affordable time-travel debugging in managed
runtimes",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "67--82",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660209",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developers who set a breakpoint a few statements too
late or who are trying to diagnose a subtle bug from a
single core dump often wish for a time-traveling
debugger. The ability to rewind time to see the exact
sequence of statements and program values leading to an
error has great intuitive appeal but, due to large time
and space overheads, time traveling debuggers have seen
limited adoption. A managed runtime, such as the Java
JVM or a JavaScript engine, has already paid much of
the cost of providing core features --- type safety,
memory management, and virtual IO --- that can be
reused to implement a low overhead time-traveling
debugger. We leverage this insight to design and build
affordable time-traveling debuggers for managed
languages. Tardis realizes our design: it provides
affordable time-travel with an average overhead of only
7\% during normal execution, a rate of 0.6MB/s of
history logging, and a worst-case 0.68s time-travel
latency on our benchmark applications. Tardis can also
debug optimized code using time-travel to reconstruct
state. This capability, coupled with its low overhead,
makes Tardis suitable for use as the default debugger
for managed languages, promising to bring
time-traveling debugging into the mainstream and
transform the practice of debugging.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Bell:2014:PID,
author = "Jonathan Bell and Gail Kaiser",
title = "{Phosphor}: illuminating dynamic data flow in
commodity {JVMs}",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "83--101",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660212",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Dynamic taint analysis is a well-known information
flow analysis problem with many possible applications.
Taint tracking allows for analysis of application data
flow by assigning labels to data, and then propagating
those labels through data flow. Taint tracking systems
traditionally compromise among performance, precision,
soundness, and portability. Performance can be
critical, as these systems are often intended to be
deployed to production environments, and hence must
have low overhead. To be deployed in security-conscious
settings, taint tracking must also be sound and
precise. Dynamic taint tracking must be portable in
order to be easily deployed and adopted for real world
purposes, without requiring recompilation of the
operating system or language interpreter, and without
requiring access to application source code. We present
Phosphor, a dynamic taint tracking system for the Java
Virtual Machine (JVM) that simultaneously achieves our
goals of performance, soundness, precision, and
portability. Moreover, to our knowledge, it is the
first portable general purpose taint tracking system
for the JVM. We evaluated Phosphor 's performance on
two commonly used JVM languages (Java and Scala), on
two successive revisions of two commonly used JVMs
(Oracle's HotSpot and OpenJDK's IcedTea) and on
Android's Dalvik Virtual Machine, finding its
performance to be impressive: as low as 3\% (53\% on
average; 220\% at worst) using the DaCapo macro
benchmark suite. This paper describes our approach
toward achieving portable taint tracking in the JVM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Pina:2014:RDJ,
author = "Lu{\'\i}s Pina and Lu{\'\i}s Veiga and Michael Hicks",
title = "{Rubah}: {DSU} for {Java} on a stock {JVM}",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "103--119",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660220",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents Rubah, the first dynamic software
updating system for Java that: is portable, implemented
via libraries and bytecode rewriting on top of a
standard JVM; is efficient, imposing essentially no
overhead on normal, steady-state execution; is
flexible, allowing nearly arbitrary changes to classes
between updates; and is non-disruptive, employing
either a novel eager algorithm that transforms the
program state with multiple threads, or a novel lazy
algorithm that transforms objects as they are demanded,
post-update. Requiring little programmer effort, Rubah
has been used to dynamically update five long-running
applications: the H2 database, the Voldemort key-value
store, the Jake2 implementation of the Quake 2 shooter
game, the CrossFTP server, and the JavaEmailServer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Shahriyar:2014:FCG,
author = "Rifat Shahriyar and Stephen M. Blackburn and Kathryn
S. McKinley",
title = "Fast conservative garbage collection",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "121--139",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660198",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Garbage collectors are exact or conservative. An exact
collector identifies all references precisely and may
move referents and update references, whereas a
conservative collector treats one or more of stack,
register, and heap references as ambiguous. Ambiguous
references constrain collectors in two ways. (1) Since
they may be pointers, the collectors must retain
referents. (2) Since they may be values, the collectors
cannot modify them, pinning their referents. We explore
conservative collectors for managed languages, with
ambiguous stacks and registers. We show that for Java
benchmarks they retain and pin remarkably few heap
objects: $ < 0.01 \% $ are falsely retained and 0.03\%
are pinned. The larger effect is collector design.
Prior conservative collectors (1) use mark-sweep and
unnecessarily forgo moving all objects, or (2) use
mostly copying and pin entire pages. Compared to
generational collection, overheads are substantial:
12\% and 45\% respectively. We introduce high
performance conservative Immix and reference counting
(RC). Immix is a mark-region collector with fine line
-grain pinning and opportunistic copying of unambiguous
referents. Deferred RC simply needs an object map to
deliver the first conservative RC. We implement six
exact collectors and their conservative counterparts.
Conservative Immix and RC come within 2 to 3\% of their
exact counterparts. In particular, conservative RC
Immix is slightly faster than a well-tuned exact
generational collector. These findings show that for
managed languages, conservative collection is
compatible with high performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Holk:2014:RBM,
author = "Eric Holk and Ryan Newton and Jeremy Siek and Andrew
Lumsdaine",
title = "Region-based memory management for {GPU} programming
languages: enabling rich data structures on a spartan
host",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "141--155",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660244",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphics processing units (GPUs) can effectively
accelerate many applications, but their applicability
has been largely limited to problems whose solutions
can be expressed neatly in terms of linear algebra.
Indeed, most GPU programming languages limit the user
to simple data structures --- typically only
multidimensional rectangular arrays of scalar values.
Many algorithms are more naturally expressed using
higher level language features, such as algebraic data
types (ADTs) and first class procedures, yet building
these structures in a manner suitable for a GPU remains
a challenge. We present a region-based memory
management approach that enables rich data structures
in Harlan, a language for data parallel computing.
Regions enable rich data structures by providing a
uniform representation for pointers on both the CPU and
GPU and by providing a means of transferring entire
data structures between CPU and GPU memory. We
demonstrate Harlan's increased expressiveness on
several example programs and show that Harlan performs
well on more traditional data-parallel problems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Uhler:2014:SSB,
author = "Richard Uhler and Nirav Dave",
title = "{Smten} with satisfiability-based search",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "157--176",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660208",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Satisfiability (SAT) and Satisfiability Modulo
Theories (SMT) have been used in solving a wide variety
of important and challenging problems, including
automatic test generation, model checking, and program
synthesis. For these applications to scale to larger
problem instances, developers cannot rely solely on the
sophistication of SAT and SMT solvers to efficiently
solve their queries; they must also optimize their own
orchestration and construction of queries. We present
Smten, a high-level language for orchestrating and
constructing satisfiability-based search queries. We
show that applications developed using Smten require
significantly fewer lines of code and less developer
effort to achieve results comparable to standard
SMT-based tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Bosboom:2014:SCC,
author = "Jeffrey Bosboom and Sumanaruban Rajadurai and Weng-Fai
Wong and Saman Amarasinghe",
title = "{StreamJIT}: a commensal compiler for high-performance
stream programming",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "177--195",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660236",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There are many domain libraries, but despite the
performance benefits of compilation, domain-specific
languages are comparatively rare due to the high cost
of implementing an optimizing compiler. We propose
commensal compilation, a new strategy for compiling
embedded domain-specific languages by reusing the
massive investment in modern language virtual machine
platforms. Commensal compilers use the host language's
front-end, use host platform APIs that enable back-end
optimizations by the host platform JIT, and use an
autotuner for optimization selection. The cost of
implementing a commensal compiler is only the cost of
implementing the domain-specific optimizations. We
demonstrate the concept by implementing a commensal
compiler for the stream programming language StreamJIT
atop the Java platform. Our compiler achieves
performance 2.8 times better than the StreamIt native
code (via GCC) compiler with considerably less
implementation effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Tosch:2014:SPA,
author = "Emma Tosch and Emery D. Berger",
title = "{SurveyMan}: programming and automatically debugging
surveys",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "197--211",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660206",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Surveys can be viewed as programs, complete with
logic, control flow, and bugs. Word choice or the order
in which questions are asked can unintentionally bias
responses. Vague, confusing, or intrusive questions can
cause respondents to abandon a survey. Surveys can also
have runtime errors: inattentive respondents can taint
results. This effect is especially problematic when
deploying surveys in uncontrolled settings, such as on
the web or via crowdsourcing platforms. Because the
results of surveys drive business decisions and inform
scientific conclusions, it is crucial to make sure they
are correct. We present SurveyMan, a system for
designing, deploying, and automatically debugging
surveys. Survey authors write their surveys in a
lightweight domain-specific language aimed at end
users. SurveyMan statically analyzes the survey to
provide feedback to survey authors before deployment.
It then compiles the survey into JavaScript and deploys
it either to the web or a crowdsourcing platform.
SurveyMan 's dynamic analyses automatically find survey
bugs, and control for the quality of responses. We
evaluate SurveyMan 's algorithms analytically and
empirically, demonstrating its effectiveness with case
studies of social science surveys conducted via
Amazon's Mechanical Turk.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Bartenstein:2014:RTS,
author = "Thomas W. Bartenstein and Yu David Liu",
title = "Rate types for stream programs",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "213--232",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660225",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce RATE TYPES, a novel type system to reason
about and optimize data-intensive programs. Built
around stream languages, RATE TYPES performs static
quantitative reasoning about stream rates --- the
frequency of data items in a stream being consumed,
processed, and produced. Despite the fact that streams
are fundamentally dynamic, we find two essential
concepts of stream rate control --- throughput ratio
and natural rate --- are intimately related to the
program structure itself and can be effectively
reasoned about by a type system. RATE TYPES is proven
to correspond with a time-aware and parallelism-aware
operational semantics. The strong correspondence result
tolerates arbitrary schedules, and does not require any
synchronization between stream filters.We further
implement RATE TYPES, demonstrating its effectiveness
in predicting stream data rates in real-world stream
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Amin:2014:FPD,
author = "Nada Amin and Tiark Rompf and Martin Odersky",
title = "Foundations of path-dependent types",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "233--249",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660216",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A scalable programming language is one in which the
same concepts can describe small as well as large
parts. Towards this goal, Scala unifies concepts from
object and module systems. An essential ingredient of
this unification is the concept of objects with type
members, which can be referenced through path-dependent
types. Unfortunately, path-dependent types are not
well-understood, and have been a roadblock in grounding
the Scala type system on firm theory. We study several
calculi for path-dependent types. We present DOT which
captures the essence --- DOT stands for Dependent
Object Types. We explore the design space bottom-up,
teasing apart inherent from accidental complexities,
while fully mechanizing our models at each step. Even
in this simple setting, many interesting patterns arise
from the interaction of structural and nominal
features. Whereas our simple calculus enjoys many
desirable and intuitive properties, we demonstrate that
the theory gets much more complicated once we add
another Scala feature, type refinement, or extend the
subtyping relation to a lattice. We discuss possible
remedies and trade-offs in modeling type systems for
Scala-like languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Allende:2014:CGT,
author = "Esteban Allende and Johan Fabry and Ronald Garcia and
{\'E}ric Tanter",
title = "Confined gradual typing",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "251--270",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660222",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Gradual typing combines static and dynamic typing
flexibly and safely in a single programming language.
To do so, gradually typed languages implicitly insert
casts where needed, to ensure at runtime that typing
assumptions are not violated by untyped code. However,
the implicit nature of cast insertion, especially on
higher-order values, can jeopardize reliability and
efficiency: higher-order casts can fail at any time,
and are costly to execute. We propose Confined Gradual
Typing, which extends gradual typing with two new type
qualifiers that let programmers control the flow of
values between the typed and the untyped worlds, and
thereby trade some flexibility for more reliability and
performance. We formally develop two variants of
Confined Gradual Typing that capture different
flexibility/guarantee tradeoffs. We report on the
implementation of Confined Gradual Typing in
Gradualtalk, a gradually-typed Smalltalk, which
confirms the performance advantage of avoiding unwanted
higher-order casts and the low overhead of the
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Altidor:2014:RJG,
author = "John Altidor and Yannis Smaragdakis",
title = "Refactoring {Java} generics by inferring wildcards, in
practice",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "271--290",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660203",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Wildcard annotations can improve the generality of
Java generic libraries, but require heavy manual
effort. We present an algorithm for refactoring and
inferring more general type instantiations of Java
generics using wildcards. Compared to past approaches,
our work is practical and immediately applicable: we
assume no changes to the Java type system, while taking
into account all its intricacies. Our system allows
users to select declarations (variables, method
parameters, return types, etc.) to generalize and
considers declarations not declared in available source
code. It then performs an inter-procedural flow
analysis and a method body analysis, in order to
generalize type signatures. We evaluate our technique
on six Java generic libraries. We find that 34\% of
available declarations of variant type signatures can
be generalized --- i.e., relaxed with more general
wildcard types. On average, 146 other declarations need
to be updated when a declaration is generalized,
showing that this refactoring would be too tedious and
error-prone to perform manually.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{David:2014:CMC,
author = "Florian David and Gael Thomas and Julia Lawall and
Gilles Muller",
title = "Continuously measuring critical section pressure with
the free-lunch profiler",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "291--307",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660210",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today, Java is regularly used to implement large
multi-threaded server-class applications that use locks
to protect access to shared data. However,
understanding the impact of locks on the performance of
a system is complex, and thus the use of locks can
impede the progress of threads on configurations that
were not anticipated by the developer, during specific
phases of the execution. In this paper, we propose Free
Lunch, a new lock profiler for Java application
servers, specifically designed to identify, in-vivo,
phases where the progress of the threads is impeded by
a lock. Free Lunch is designed around a new metric,
critical section pressure (CSP), which directly
correlates the progress of the threads to each of the
locks. Using Free Lunch, we have identified phases of
high CSP, which were hidden with other lock profilers,
in the distributed Cassandra NoSQL database and in
several applications from the DaCapo 9.12, the
SPECjvm2008 and the SPECjbb2005 benchmark suites. Our
evaluation of Free Lunch shows that its overhead is
never greater than 6\%, making it suitable for in-vivo
use.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Misailovic:2014:CRA,
author = "Sasa Misailovic and Michael Carbin and Sara Achour and
Zichao Qi and Martin C. Rinard",
title = "{Chisel}: reliability- and accuracy-aware optimization
of approximate computational kernels",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "309--328",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660231",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The accuracy of an approximate computation is the
distance between the result that the computation
produces and the corresponding fully accurate result.
The reliability of the computation is the probability
that it will produce an acceptably accurate result.
Emerging approximate hardware platforms provide
approximate operations that, in return for reduced
energy consumption and/or increased performance,
exhibit reduced reliability and/or accuracy. We present
Chisel, a system for reliability- and accuracy-aware
optimization of approximate computational kernels that
run on approximate hardware platforms. Given a combined
reliability and/or accuracy specification, Chisel
automatically selects approximate kernel operations to
synthesize an approximate computation that minimizes
energy consumption while satisfying its reliability and
accuracy specification. We evaluate Chisel on five
applications from the image processing, scientific
computing, and financial analysis domains. The
experimental results show that our implemented
optimization algorithm enables Chisel to optimize our
set of benchmark kernels to obtain energy savings from
8.7\% to 19.8\% compared to the fully reliable kernel
implementations while preserving important reliability
guarantees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Kambadur:2014:ESE,
author = "Melanie Kambadur and Martha A. Kim",
title = "An experimental survey of energy management across the
stack",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "329--344",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660196",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern demand for energy-efficient computation has
spurred research at all levels of the stack, from
devices to microarchitecture, operating systems,
compilers, and languages. Unfortunately, this breadth
has resulted in a disjointed space, with technologies
at different levels of the system stack rarely
compared, let alone coordinated. This work begins to
remedy the problem, conducting an experimental survey
of the present state of energy management across the
stack. Focusing on settings that are exposed to
software, we measure the total energy, average power,
and execution time of 41 benchmark applications in 220
configurations, across a total of 200,000 program
executions. Some of the more important findings of the
survey include that effective parallelization and
compiler optimizations have the potential to save far
more energy than Linux's frequency tuning algorithms;
that certain non-complementary energy strategies can
undercut each other's savings by half when combined;
and that while the power impacts of most strategies
remain constant across applications, the runtime
impacts vary, resulting in inconsistent energy
impacts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Pinto:2014:UEB,
author = "Gustavo Pinto and Fernando Castor and Yu David Liu",
title = "Understanding energy behaviors of thread management
constructs",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "345--360",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660235",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Java programmers are faced with numerous choices in
managing concurrent execution on multicore platforms.
These choices often have different trade-offs (e.g.,
performance, scalability, and correctness guarantees).
This paper analyzes an additional dimension, energy
consumption. It presents an empirical study aiming to
illuminate the relationship between the choices and
settings of thread management constructs and energy
consumption. We consider three important thread
management constructs in concurrent programming:
explicit thread creation, fixed-size thread pooling,
and work stealing. We further shed light on the
energy/performance trade-off of three ``tuning knobs''
of these constructs: the number of threads, the task
division strategy, and the characteristics of processed
data. Through an extensive experimental space
exploration over real-world Java programs, we produce a
list of findings about the energy behaviors of
concurrent programs, which are not always obvious. The
study serves as a first step toward improving energy
efficiency of concurrent programs on parallel
architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Drechsler:2014:DRU,
author = "Joscha Drechsler and Guido Salvaneschi and Ragnar Mogk
and Mira Mezini",
title = "Distributed {REScala}: an update algorithm for
distributed reactive programming",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "361--376",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660240",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reactive programming improves the design of reactive
applications by relocating the logic for managing
dependencies between dependent values away from the
application logic to the language implementation. Many
distributed applications are reactive. Yet, existing
change propagation algorithms are not suitable in a
distributed setting. We propose Distributed REScala, a
reactive language with a change propagation algorithm
that works without centralized knowledge about the
topology of the dependency structure among reactive
values and avoids unnecessary propagation of changes,
while retaining safety guarantees ( glitch freedom ).
Distributed REScala enables distributed reactive
programming, bringing the benefits of reactive
programming to distributed applications. We demonstrate
the enabled design improvements by a case study. We
also empirically evaluate the performance of our
algorithm in comparison to other algorithms in a
simulated distributed setting.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Rendel:2014:OAA,
author = "Tillmann Rendel and Jonathan Immanuel Brachth{\"a}user
and Klaus Ostermann",
title = "From object algebras to attribute grammars",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "377--395",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660237",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Oliveira and Cook (2012) and Oliveira et al. (2013)
have recently introduced object algebras as a program
structuring technique to improve the modularity and
extensibility of programs. We analyze the relationship
between object algebras and attribute grammars (AGs), a
formalism to augment context-free grammars with
attributes. We present an extension of the object
algebra technique with which the full class of
L-attributed grammars --- an important class of AGs
that corresponds to one-pass compilers --- can be
encoded in Scala. The encoding is modular (attributes
can be defined and type-checked separately), scalable
(the size of the encoding is linear in the size of the
AG specification) and compositional (each AG artifact
is represented as a semantic object of the host
language). To evaluate these claims, we have formalized
the encoding and re-implemented a one-pass compiler for
a subset of C with our technique. We also discuss how
advanced features of modern AG systems, such as
higher-order and parameterized attributes, reference
attributes, and forwarding can be supported.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Ureche:2014:LDL,
author = "Vlad Ureche and Eugene Burmako and Martin Odersky",
title = "Late data layout: unifying data representation
transformations",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "397--416",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660197",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Values need to be represented differently when
interacting with certain language features. For
example, an integer has to take an object-based
representation when interacting with erased generics,
although, for performance reasons, the stack-based
value representation is better. To abstract over these
implementation details, some programming languages
choose to expose a unified high-level concept (the
integer) and let the compiler choose its exact
representation and insert coercions where necessary.
This pattern appears in multiple language features such
as value classes, specialization and multi-stage
programming: they all expose a unified concept which
they later refine into multiple representations. Yet,
the underlying compiler implementations typically
entangle the core mechanism with assumptions about the
alternative representations and their interaction with
other language features. In this paper we present the
Late Data Layout mechanism, a simple but versatile
type-driven generalization that subsumes and improves
the state-of-the-art representation transformations. In
doing so, we make two key observations: (1) annotated
types conveniently capture the semantics of using
multiple representations and (2) local type inference
can be used to consistently and optimally introduce
coercions. We validated our approach by implementing
three language features as Scala compiler extensions:
value classes, specialization (using the miniboxing
representation) and a simplified multi-stage
programming mechanism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Mitschke:2014:ILI,
author = "Ralf Mitschke and Sebastian Erdweg and Mirko
K{\"o}hler and Mira Mezini and Guido Salvaneschi",
title = "{i3QL}: language-integrated live data views",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "417--432",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660242",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "An incremental computation updates its result based on
a change to its input, which is often an order of
magnitude faster than a recomputation from scratch. In
particular, incrementalization can make expensive
computations feasible for settings that require short
feedback cycles, such as interactive systems, IDEs, or
(soft) real-time systems. This paper presents i3QL, a
general-purpose programming language for specifying
incremental computations. i3QL provides a declarative
SQL-like syntax and is based on incremental versions of
operators from relational algebra, enriched with
support for general recursion. We integrated i3QL into
Scala as a library, which enables programmers to use
regular Scala code for non-incremental subcomputations
of an i3QL query and to easily integrate incremental
computations into larger software projects. To improve
performance, i3QL optimizes user-defined queries by
applying algebraic laws and partial evaluation. We
describe the design and implementation of i3QL and its
optimizations, demonstrate its applicability, and
evaluate its performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Chakrabarti:2014:ALL,
author = "Dhruva R. Chakrabarti and Hans-J. Boehm and Kumud
Bhandari",
title = "{Atlas}: leveraging locks for non-volatile memory
consistency",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "433--452",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660224",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Non-volatile main memory, such as memristors or phase
change memory, can revolutionize the way programs
persist data. In-memory objects can themselves be
persistent without the need for a separate persistent
data storage format. However, the challenge is to
ensure that such data remains consistent if a failure
occurs during execution. In this paper, we present our
system, called Atlas, which adds durability semantics
to lock-based code, typically allowing us to
automatically maintain a globally consistent state even
in the presence of failures. We identify failure-atomic
sections of code based on existing critical sections
and describe a log-based implementation that can be
used to recover a consistent state after a failure. We
discuss several subtle semantic issues and
implementation tradeoffs. We confirm the ability to
rapidly flush CPU caches as a core implementation
bottleneck and suggest partial solutions. Experimental
results confirm the practicality of our approach and
provide insight into the overheads of such a system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Steele:2014:FSP,
author = "Guy L. {Steele, Jr.} and Doug Lea and Christine H.
Flood",
title = "Fast splittable pseudorandom number generators",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "453--472",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660195",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/bibnet/authors/m/marsaglia-george.bib;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/jstatsoft.bib;
https://www.math.utah.edu/pub/tex/bib/mathcw.bib;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/tomacs.bib",
abstract = "We describe a new algorithm SplitMix for an
object-oriented and splittable pseudorandom number
generator (PRNG) that is quite fast: 9 64-bit
arithmetic/logical operations per 64 bits generated. A
conventional linear PRNG object provides a generate
method that returns one pseudorandom value and updates
the state of the PRNG, but a splittable PRNG object
also has a second operation, split, that replaces the
original PRNG object with two (seemingly) independent
PRNG objects, by creating and returning a new such
object and updating the state of the original object.
Splittable PRNG objects make it easy to organize the
use of pseudorandom numbers in multithreaded programs
structured using fork-join parallelism. No locking or
synchronization is required (other than the usual
memory fence immediately after object creation).
Because the generate method has no loops or
conditionals, it is suitable for SIMD or GPU
implementation. We derive SplitMix from the DotMix
algorithm of Leiserson, Schardl, and Sukha by making a
series of program transformations and engineering
improvements. The end result is an object-oriented
version of the purely functional API used in the
Haskell library for over a decade, but SplitMix is
faster and produces pseudorandom sequences of higher
quality; it is also far superior in quality and speed
to java.util.Random, and has been included in Java JDK8
as the class java.util.SplittableRandom. We have tested
the pseudorandom sequences produced by SplitMix using
two standard statistical test suites (DieHarder and
TestU01) and they appear to be adequate for
``everyday'' use, such as in Monte Carlo algorithms and
randomized data structures where speed is important.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark-1 = "OOPSLA '14 conference proceedings.",
remark-2 = "On page 466, the authors describe an interesting
technique for improving a user-supplied seed that might
produce insufficient randomness in the next several
members of the random-number sequence: ``Long runs of
0-bits or of 1-bits in the $\gamma$ [candidate seed]
value do not cause bits of the seed to flip; an
approximate proxy for how many bits of the seed will
flip might be the number of bit pairs of the form 01 or
10 in the candidate $\gamma$ value {\tt z}. Therefore
we require that the number of such pairs, as computed
by {\tt Long.bitCount(z ^ (z >>> 1))}, exceed 24; if it
does not, then the candidate z is replaced by the XOR
of {\tt z} and {\tt 0xaaaaaaaaaaaaaaaaL}, a constant
chosen so that (a) the low bit of {\tt z} remains 1,
and (b) every bit pair of the form 00 or 11 becomes
either 01 or 10, and likewise every bit pair of the
form 01 or 10 becomes either 00 or 11, so the new value
necessarily has more than 24 bit pairs whose bits
differ. Testing shows that this trick appears to be
effective.''",
remark-3 = "From page 468: ``we did three runs of TestU01 BigCrush
on {\tt java.util.Random}; 19 tests produced clear
failure on all three runs. These included 9 Birthday
Spacings tests, 8 ClosePairs tests, a WeightDistrib
test, and a CouponCollector test. This confirms
L'Ecuyer's observation that {\tt java.util.Random}
tends to fail Birthday Spacings tests [17].'' The
reference is to \cite{LEcuyer:2001:SUR}.",
remark-4 = "From page 470: ``[L'Ecuyer] comments, `In the Java
class {\tt java.util.Random}, RNG streams can be
declared and constructed dynamically, without limit on
their number. However, no precaution seems to have been
taken regarding the independence of these streams.'''",
remark-5 = "From page 471: ``They [the generators in this paper]
should not be used for cryptographic or security
applications, because they are too predictable (the
mixing functions are easily inverted, and two
successive outputs suffice to reconstruct the internal
state), \ldots{} One version seems especially suitable
for use as a replacement for {\tt java.util.Random},
because it produces sequences of higher quality, is
faster in sequential use, is easily parallelized for
use in JDK8 stream expressions, and is amenable to
efficient implementation on SIMD and GPU
architectures.''",
}
@Article{Samak:2014:MTS,
author = "Malavika Samak and Murali Krishna Ramanathan",
title = "Multithreaded test synthesis for deadlock detection",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "473--489",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660238",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Designing and implementing thread-safe multithreaded
libraries can be a daunting task as developers of these
libraries need to ensure that their implementations are
free from concurrency bugs, including deadlocks. The
usual practice involves employing software testing
and/or dynamic analysis to detect deadlocks. Their
effectiveness is dependent on well-designed
multithreaded test cases. Unsurprisingly, developing
multithreaded tests is significantly harder than
developing sequential tests for obvious reasons. In
this paper, we address the problem of automatically
synthesizing multithreaded tests that can induce
deadlocks. The key insight to our approach is that a
subset of the properties observed when a deadlock
manifests in a concurrent execution can also be
observed in a single threaded execution. We design a
novel, automatic, scalable and directed approach that
identifies these properties and synthesizes a deadlock
revealing multithreaded test. The input to our approach
is the library implementation under consideration and
the output is a set of deadlock revealing multithreaded
tests. We have implemented our approach as part of a
tool, named OMEN$^1$. OMEN is able to synthesize
multithreaded tests on many multithreaded Java
libraries. Applying a dynamic deadlock detector on the
execution of the synthesized tests results in the
detection of a number of deadlocks, including 35 real
deadlocks in classes documented as thread-safe.
Moreover, our experimental results show that dynamic
analysis on multithreaded tests that are either
synthesized randomly or developed by third-party
programmers are ineffective in detecting the
deadlocks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Bergan:2014:SEM,
author = "Tom Bergan and Dan Grossman and Luis Ceze",
title = "Symbolic execution of multithreaded programs from
arbitrary program contexts",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "491--506",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660200",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe an algorithm to perform symbolic execution
of a multithreaded program starting from an arbitrary
program context. We argue that this can enable more
efficient symbolic exploration of deep code paths in
multithreaded programs by allowing the symbolic engine
to jump directly to program contexts of interest. The
key challenge is modeling the initial context with
reasonable precision --- an overly approximate model
leads to exploration of many infeasible paths during
symbolic execution, while a very precise model would be
so expensive to compute that computing it would defeat
the purpose of jumping directly to the initial context
in the first place. We propose a context-specific
dataflow analysis that approximates the initial context
cheaply, but precisely enough to avoid some common
causes of infeasible-path explosion. This model is
necessarily approximate --- it may leave portions of
the memory state unconstrained, leaving our symbolic
execution unable to answer simple questions such as
``which thread holds lock A?''. For such cases, we
describe a novel algorithm for evaluating symbolic
synchronization during symbolic execution. Our symbolic
execution semantics are sound and complete up to the
limits of the underlying SMT solver. We describe
initial experiments on an implementation in Cloud 9.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Barowy:2014:CDD,
author = "Daniel W. Barowy and Dimitar Gochev and Emery D.
Berger",
title = "{CheckCell}: data debugging for spreadsheets",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "507--523",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660207",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Testing and static analysis can help root out bugs in
programs, but not in data. This paper introduces data
debugging, an approach that combines program analysis
and statistical analysis to automatically find
potential data errors. Since it is impossible to know a
priori whether data are erroneous, data debugging
instead locates data that has a disproportionate impact
on the computation. Such data is either very important,
or wrong. Data debugging is especially useful in the
context of data-intensive programming environments that
intertwine data with programs in the form of queries or
formulas. We present the first data debugging tool,
CheckCell, an add-in for Microsoft Excel. CheckCell
identifies cells that have an unusually high impact on
the spreadsheet's computations. We show that CheckCell
is both analytically and empirically fast and
effective. We show that it successfully finds injected
typographical errors produced by a generative model
trained with data entry from 169,112 Mechanical Turk
tasks. CheckCell is more precise and efficient than
standard outlier detection techniques. CheckCell also
automatically identifies a key flaw in the infamous
Reinhart and Rogoff spreadsheet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Pavlinovic:2014:FMT,
author = "Zvonimir Pavlinovic and Tim King and Thomas Wies",
title = "Finding minimum type error sources",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "525--542",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660230",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Automatic type inference is a popular feature of
functional programming languages. If a program cannot
be typed, the compiler typically reports a single
program location in its error message. This location is
the point where the type inference failed, but not
necessarily the actual source of the error. Other
potential error sources are not even considered. Hence,
the compiler often misses the true error source, which
increases debugging time for the programmer. In this
paper, we present a general framework for automatic
localization of type errors. Our algorithm finds all
minimum error sources, where the exact definition of
minimum is given in terms of a compiler-specific
ranking criterion. Compilers can use minimum error
sources to produce more meaningful error reports, and
for automatic error correction. Our approach works by
reducing the search for minimum error sources to an
optimization problem that we formulate in terms of
weighted maximum satisfiability modulo theories
(MaxSMT). The reduction to weighted MaxSMT allows us to
build on SMT solvers to support rich type systems and
at the same time abstract from the concrete criterion
that is used for ranking the error sources. We have
implemented an instance of our framework targeted at
Hindley-Milner type systems and evaluated it on
existing OCaml benchmarks for type error localization.
Our evaluation shows that our approach has the
potential to significantly improve the quality of type
error reports produced by state of the art compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Liu:2014:FFL,
author = "Peng Liu and Omer Tripp and Xiangyu Zhang",
title = "{Flint}: fixing linearizability violations",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "543--560",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660217",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing concurrent software while achieving both
correctness and efficiency is a grand challenge. To
facilitate this task, concurrent data structures have
been introduced into the standard library of popular
languages like Java and C\#. Unfortunately, while the
operations exposed by concurrent data structures are
atomic (or linearizable), compositions of these
operations are not necessarily atomic. Recent studies
have found many erroneous implementations of composed
concurrent operations. We address the problem of fixing
nonlinearizable composed operations such that they
behave atomically. We introduce Flint, an automated
fixing algorithm for composed Map operations. Flint
accepts as input a composed operation suffering from
atomicity violations. Its output, if fixing succeeds,
is a composed operation that behaves equivalently to
the original operation in sequential runs and is
guaranteed to be atomic. To our knowledge, Flint is the
first general algorithm for fixing incorrect concurrent
compositions. We have evaluated Flint on 48 incorrect
compositions from 27 popular applications, including
Tomcat and MyFaces. The results are highly encouraging:
Flint is able to correct 96\% of the methods, and the
fixed version is often the same as the fix by an expert
programmer and as efficient as the original code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Song:2014:SDR,
author = "Linhai Song and Shan Lu",
title = "Statistical debugging for real-world performance
problems",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "561--578",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660234",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Design and implementation defects that lead to
inefficient computation widely exist in software. These
defects are difficult to avoid and discover. They lead
to severe performance degradation and energy waste
during production runs, and are becoming increasingly
critical with the meager increase of single-core
hardware performance and the increasing concerns about
energy constraints. Effective tools that diagnose
performance problems and point out the inefficiency
root cause are sorely needed. The state of the art of
performance diagnosis is preliminary. Profiling can
identify the functions that consume the most
computation resources, but can neither identify the
ones that waste the most resources nor explain why.
Performance-bug detectors can identify specific type of
inefficient computation, but are not suited for
diagnosing general performance problems. Effective
failure diagnosis techniques, such as statistical
debugging, have been proposed for functional bugs.
However, whether they work for performance problems is
still an open question. In this paper, we first conduct
an empirical study to understand how performance
problems are observed and reported by real-world users.
Our study shows that statistical debugging is a natural
fit for diagnosing performance problems, which are
often observed through comparison-based approaches and
reported together with both good and bad inputs. We
then thoroughly investigate different design points in
statistical debugging, including three different
predicates and two different types of statistical
models, to understand which design point works the best
for performance diagnosis. Finally, we study how some
unique nature of performance bugs allows sampling
techniques to lower the overhead of run-time
performance diagnosis without extending the diagnosis
latency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Parr:2014:ALP,
author = "Terence Parr and Sam Harwell and Kathleen Fisher",
title = "Adaptive {LL(*)} parsing: the power of dynamic
analysis",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "579--598",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660202",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the advances made by modern parsing strategies
such as PEG, LL (*), GLR, and GLL, parsing is not a
solved problem. Existing approaches suffer from a
number of weaknesses, including difficulties supporting
side-effecting embedded actions, slow and/or
unpredictable performance, and counter-intuitive
matching strategies. This paper introduces the ALL (*)
parsing strategy that combines the simplicity,
efficiency, and predictability of conventional top-down
LL(k) parsers with the power of a GLR-like mechanism to
make parsing decisions. The critical innovation is to
move grammar analysis to parse-time, which lets ALL(*)
handle any non-left-recursive context-free grammar. ALL
(*) is O(n$^4$ ) in theory but consistently performs
linearly on grammars used in practice, outperforming
general strategies such as GLL and GLR by orders of
magnitude. ANTLR 4 generates ALL (*) parsers and
supports direct left-recursion through grammar
rewriting. Widespread ANTLR 4 use (5000 downloads/month
in 2013) provides evidence that ALL (*) is effective
for a wide variety of applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Gligoric:2014:AMB,
author = "Milos Gligoric and Wolfram Schulte and Chandra Prasad
and Danny van Velzen and Iman Narasamdya and Benjamin
Livshits",
title = "Automated migration of build scripts using dynamic
analysis and search-based refactoring",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "599--616",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660239",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The efficiency of a build system is an important
factor for developer productivity. As a result,
developer teams have been increasingly adopting new
build systems that allow higher build parallelization.
However, migrating the existing legacy build scripts to
new build systems is a tedious and error-prone process.
Unfortunately, there is insufficient support for
automated migration of build scripts, making the
migration more problematic. We propose the first
dynamic approach for automated migration of build
scripts to new build systems. Our approach works in two
phases. First, from a set of execution traces, we
synthesize build scripts that accurately capture the
intent of the original build. The synthesized build
scripts are typically long and hard to maintain.
Second, we apply refactorings that raise the
abstraction level of the synthesized scripts (e.g.,
introduce functions for similar fragments). As
different refactoring sequences may lead to different
build scripts, we use a search-based approach that
explores various sequences to identify the best (e.g.,
shortest) build script. We optimize search-based
refactoring with partial-order reduction to faster
explore refactoring sequences. We implemented the
proposed two phase migration approach in a tool called
METAMORPHOSIS that has been recently used at
Microsoft.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Kumar:2014:MCM,
author = "Vineet Kumar and Laurie Hendren",
title = "{MIX10}: compiling {MATLAB} to {X10} for high
performance",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "617--636",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660218",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "MATLAB is a popular dynamic array-based language
commonly used by students, scientists and engineers who
appreciate the interactive development style, the rich
set of array operators, the extensive builtin library,
and the fact that they do not have to declare static
types. Even though these users like to program in
MATLAB, their computations are often very
compute-intensive and are better suited for emerging
high performance computing systems. This paper reports
on MIX10, a source-to-source compiler that
automatically translates MATLAB programs to X10, a
language designed for ``Performance and Productivity at
Scale''; thus, helping scientific programmers make
better use of high performance computing systems. There
is a large semantic gap between the array-based
dynamically-typed nature of MATLAB and the
object-oriented, statically-typed, and high-level array
abstractions of X10. This paper addresses the major
challenges that must be overcome to produce sequential
X10 code that is competitive with state-of-the-art
static compilers for MATLAB which target more
conventional imperative languages such as C and
Fortran. Given that efficient basis, the paper then
provides a translation for the MATLAB parfor construct
that leverages the powerful concurrency constructs in
X10. The MIX10 compiler has been implemented using the
McLab compiler tools, is open source, and is available
both for compiler researchers and end-user MATLAB
programmers. We have used the implementation to perform
many empirical measurements on a set of 17 MATLAB
benchmarks. We show that our best MIX10-generated code
is significantly faster than the de facto Mathworks'
MATLAB system, and that our results are competitive
with state-of-the-art static compilers that target C
and Fortran. We also show the importance of finding the
correct approach to representing the arrays in the
generated X10 code, and the necessity of an IntegerOkay
' analysis that determines which double variables can
be safely represented as integers. Finally, we show
that our X10-based handling of the MATLAB parfor
greatly outperforms the de facto MATLAB
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Jonnalagedda:2014:SPC,
author = "Manohar Jonnalagedda and Thierry Coppey and Sandro
Stucki and Tiark Rompf and Martin Odersky",
title = "Staged parser combinators for efficient data
processing",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "637--653",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660241",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parsers are ubiquitous in computing, and many
applications depend on their performance for decoding
data efficiently. Parser combinators are an intuitive
tool for writing parsers: tight integration with the
host language enables grammar specifications to be
interleaved with processing of parse results.
Unfortunately, parser combinators are typically slow
due to the high overhead of the host language
abstraction mechanisms that enable composition. We
present a technique for eliminating such overhead. We
use staging, a form of runtime code generation, to
dissociate input parsing from parser composition, and
eliminate intermediate data structures and computations
associated with parser composition at staging time. A
key challenge is to maintain support for input
dependent grammars, which have no clear stage
distinction. Our approach applies to top-down
recursive-descent parsers as well as bottom-up
non-deterministic parsers with key applications in
dynamic programming on sequences, where we
auto-generate code for parallel hardware. We achieve
performance comparable to specialized, hand-written
parsers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Rosner:2014:BET,
author = "Nicol{\'a}s Rosner and Valeria Bengolea and Pablo
Ponzio and Shadi Abdul Khalek and Nazareno Aguirre and
Marcelo F. Frias and Sarfraz Khurshid",
title = "Bounded exhaustive test input generation from hybrid
invariants",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "655--674",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660232",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel technique for producing bounded
exhaustive test suites from hybrid invariants, i.e.,
invariants that are expressed imperatively,
declaratively, or as a combination of declarative and
imperative predicates. Hybrid specifications are
processed using known mechanisms for the imperative and
declarative parts, but combined in a way that enables
us to exploit information from the declarative side,
such as tight bounds computed from the declarative
specification, to improve the search both on the
imperative and declarative sides. Moreover, our
technique automatically evaluates different possible
ways of processing the imperative side, and the
alternative settings (imperative or declarative) for
parts of the invariant available both declaratively and
imperatively, to decide the most convenient invariant
configuration with respect to efficiency in test
generation. This is achieved by transcoping, i.e., by
assessing the efficiency of the different alternatives
on small scopes (where generation times are
negligible), and then extrapolating the results to
larger scopes. We also show experiments involving
collection classes that support the effectiveness of
our technique, by demonstrating that (i) bounded
exhaustive suites can be computed from hybrid
invariants significantly more efficiently than doing so
using state-of-the-art purely imperative and purely
declarative approaches, and (ii) our technique is able
to automatically determine efficient hybrid invariants,
in the sense that they lead to an efficient computation
of bounded exhaustive suites, using transcoping.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Wang:2014:CVM,
author = "Peng Wang and Santiago Cuellar and Adam Chlipala",
title = "Compiler verification meets cross-language linking via
data abstraction",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "675--690",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660201",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many real programs are written in multiple different
programming languages, and supporting this pattern
creates challenges for formal compiler verification. We
describe our Coq verification of a compiler for a
high-level language, such that the compiler correctness
theorem allows us to derive partial-correctness
Hoare-logic theorems for programs built by linking the
assembly code output by our compiler and assembly code
produced by other means. Our compiler supports such
tricky features as storable cross-language function
pointers, without giving up the usual benefits of being
able to verify different compiler phases (including, in
our case, two classic optimizations) independently. The
key technical innovation is a mixed operational and
axiomatic semantics for the source language, with a
built-in notion of abstract data types, such that
compiled code interfaces with other languages only
through axiomatically specified methods that mutate
encapsulated private data, represented in whatever
formats are most natural for those languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Turon:2014:GNW,
author = "Aaron Turon and Viktor Vafeiadis and Derek Dreyer",
title = "{GPS}: navigating weak memory with ghosts, protocols,
and separation",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "691--707",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660243",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Weak memory models formalize the inconsistent
behaviors that one can expect to observe in
multithreaded programs running on modern hardware. In
so doing, however, they complicate the
already-difficult task of reasoning about correctness
of concurrent code. Worse, they render impotent the
sophisticated formal methods that have been developed
to tame concurrency, which almost universally assume a
strong ( i.e. sequentially consistent) memory model.
This paper introduces GPS, the first program logic to
provide a full-fledged suite of modern verification
techniques --- including ghost state, protocols, and
separation logic --- for high-level, structured
reasoning about weak memory. We demonstrate the
effectiveness of GPS by applying it to challenging
examples drawn from the Linux kernel as well as
lock-free data structures. We also define the semantics
of GPS and prove in Coq that it is sound with respect
to the axiomatic C11 weak memory model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Desai:2014:NPA,
author = "Ankush Desai and Pranav Garg and P. Madhusudan",
title = "Natural proofs for asynchronous programs using
almost-synchronous reductions",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "709--725",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660211",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider the problem of provably verifying that an
asynchronous message-passing system satisfies its local
assertions. We present a novel reduction scheme for
asynchronous event-driven programs that finds
almost-synchronous invariants --- invariants consisting
of global states where message buffers are close to
empty. The reduction finds almost-synchronous
invariants and simultaneously argues that they cover
all local states. We show that asynchronous programs
often have almost-synchronous invariants and that we
can exploit this to build natural proofs that they are
correct. We implement our reduction strategy, which is
sound and complete, and show that it is more effective
in proving programs correct as well as more efficient
in finding bugs in several programs, compared to
current search strategies which almost always diverge.
The high point of our experiments is that our technique
can prove the Windows Phone USB Driver written in P
[9]correct for the responsiveness property, which was
hitherto not provable using state-of-the-art
model-checkers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Zhang:2014:AIO,
author = "Wei Zhang and Per Larsen and Stefan Brunthaler and
Michael Franz",
title = "Accelerating iterators in optimizing {AST}
interpreters",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "727--743",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660223",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Generators offer an elegant way to express iterators.
However, their performance has always been their
Achilles heel and has prevented widespread adoption. We
present techniques to efficiently implement and
optimize generators. We have implemented our
optimizations in ZipPy, a modern, light-weight AST
interpreter based Python 3 implementation targeting the
Java virtual machine. Our implementation builds on a
framework that optimizes AST interpreters using
just-in-time compilation. In such a system, it is
crucial that AST optimizations do not prevent
subsequent optimizations. Our system was carefully
designed to avoid this problem. We report an average
speedup of 3.58x for generator-bound programs. As a
result, using generators no longer has downsides and
programmers are free to enjoy their upsides.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Zhao:2014:CSP,
author = "Zhijia Zhao and Bo Wu and Mingzhou Zhou and Yufei Ding
and Jianhua Sun and Xipeng Shen and Youfeng Wu",
title = "Call sequence prediction through probabilistic calling
automata",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "745--762",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660221",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Predicting a sequence of upcoming function calls is
important for optimizing programs written in modern
managed languages (e.g., Java, Javascript, C\#.)
Existing function call predictions are mainly built on
statistical patterns, suitable for predicting a single
call but not a sequence of calls. This paper presents a
new way to enable call sequence prediction, which
exploits program structures through Probabilistic
Calling Automata (PCA), a new program representation
that captures both the inherent ensuing relations among
function calls, and the probabilistic nature of
execution paths. It shows that PCA-based prediction
outperforms existing predictions, yielding substantial
speedup when being applied to guide Just-In-Time
compilation. By enabling accurate, efficient call
sequence prediction for the first time, PCA-based
predictors open up many new opportunities for dynamic
program optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Zhou:2014:SEM,
author = "Mingzhou Zhou and Xipeng Shen and Yaoqing Gao and
Graham Yiu",
title = "Space-efficient multi-versioning for input-adaptive
feedback-driven program optimizations",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "763--776",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660229",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Function versioning is an approach to addressing
input-sensitivity of program optimizations. A major
side effect of it is notable code size increase, which
has been hindering its broad applications to large code
bases and space-stringent environments. In this paper,
we initiate a systematic exploration into the problem,
providing answers to some fundamental questions: Given
a space constraint, to which function we should apply
versioning? How many versions of a function should we
include in the final executable? Is the optimal
selection feasible to do in polynomial time? This study
proves selecting the best set of versions under a space
constraint is NP-complete and proposes a heuristic
algorithm named CHoGS which yields near optimal results
in quadratic time. We implement the algorithm and
conduct experiments through the IBM XL compilers. We
observe significant performance enhancement with only
slight code size increase; the results from CHoGS show
factors of higher space efficiency than those from
traditional hotness-based methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Adams:2014:HVM,
author = "Keith Adams and Jason Evans and Bertrand Maher and
Guilherme Ottoni and Andrew Paroski and Brett Simmers
and Edwin Smith and Owen Yamauchi",
title = "The {HipHop Virtual Machine}",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "777--790",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660199",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "The HipHop Virtual Machine (HHVM) is a JIT compiler
and runtime for PHP. While PHP values are dynamically
typed, real programs often have latent types that are
useful for optimization once discovered. Some types can
be proven through static analysis, but limitations in
the ahead-of-time approach leave some types to be
discovered at run time. And even though many values
have latent types, PHP programs can also contain
polymorphic variables and expressions, which must be
handled without catastrophic slowdown. HHVM discovers
latent types by structuring its JIT around the concept
of a tracelet. A tracelet is approximately a basic
block specialized for a particular set of run-time
types for its input values. Tracelets allow HHVM to
exactly and efficiently learn the types observed by the
program, while using a simple compiler. This paper
shows that this approach enables HHVM to achieve high
levels of performance, without sacrificing
compatibility or interactivity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Nazare:2014:VMA,
author = "Henrique Nazar{\'e} and Izabela Maffra and Willer
Santos and Leonardo Barbosa and Laure Gonnord and
Fernando Magno Quint{\~a}o Pereira",
title = "Validation of memory accesses through symbolic
analyses",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "791--809",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660205",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The C programming language does not prevent
out-of-bounds memory accesses. There exist several
techniques to secure C programs; however, these methods
tend to slow down these programs substantially, because
they populate the binary code with runtime checks. To
deal with this problem, we have designed and tested two
static analyses --- symbolic region and range analysis
--- which we combine to remove the majority of these
guards. In addition to the analyses themselves, we
bring two other contributions. First, we describe live
range splitting strategies that improve the efficiency
and the precision of our analyses. Secondly, we show
how to deal with integer overflows, a phenomenon that
can compromise the correctness of static algorithms
that validate memory accesses. We validate our claims
by incorporating our findings into AddressSanitizer. We
generate SPEC CINT 2006 code that is 17\% faster and
9\% more energy efficient than the code produced
originally by this tool. Furthermore, our approach is
50\% more effective than Pentagons, a state-of-the-art
analysis to sanitize memory accesses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Partush:2014:ASD,
author = "Nimrod Partush and Eran Yahav",
title = "Abstract semantic differencing via speculative
correlation",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "811--828",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660245",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We address the problem of computing semantic
differences between a program and a patched version of
the program. Our goal is to obtain a precise
characterization of the difference between program
versions, or establish their equivalence. We focus on
infinite-state numerical programs, and use abstract
interpretation to compute an over-approximation of
program differences. Computing differences and
establishing equivalence under abstraction requires
abstracting relationships between variables in the two
programs. Towards that end, we use a correlating
abstract domain to compute a sound approximation of
these relationships which captures semantic difference.
This approximation can be computed over any
interleaving of the two programs. However, the choice
of interleaving can significantly affect precision. We
present a speculative search algorithm that aims to
find an interleaving of the two programs with minimal
abstract semantic difference. This method is unique as
it allows the analysis to dynamically alternate between
several interleavings. We have implemented our approach
and applied it to real-world examples including patches
from Git, GNU Coreutils, as well as a few handpicked
patches from the Linux kernel and the Mozilla Firefox
web browser. Our evaluation shows that we compute
precise approximations of semantic differences, and
report few false differences.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Zhang:2014:ESA,
author = "Qirun Zhang and Xiao Xiao and Charles Zhang and Hao
Yuan and Zhendong Su",
title = "Efficient subcubic alias analysis for {C}",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "829--845",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660213",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Inclusion-based alias analysis for C can be formulated
as a context-free language (CFL) reachability problem.
It is well known that the traditional cubic
CFL-reachability algorithm does not scale well in
practice. We present a highly scalable and efficient
CFL-reachability-based alias analysis for C. The key
novelty of our algorithm is to propagate reachability
information along only original graph edges and bypass
a large portion of summary edges, while the traditional
CFL-reachability algorithm propagates along all summary
edges. We also utilize the Four Russians' Trick --- a
key enabling technique in the subcubic CFL-reachability
algorithm --- in our alias analysis. We have
implemented our subcubic alias analysis and conducted
extensive experiments on widely-used C programs from
the pointer analysis literature. The results
demonstrate that our alias analysis scales extremely
well in practice. In particular, it can analyze the
recent Linux kernel (which consists of 10M SLOC) in
about 30 seconds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Brutschy:2014:SAI,
author = "Lucas Brutschy and Pietro Ferrara and Peter
M{\"u}ller",
title = "Static analysis for independent app developers",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "847--860",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660219",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mobile app markets have lowered the barrier to market
entry for software producers. As a consequence, an
increasing number of independent app developers offer
their products, and recent platforms such as the MIT
App Inventor and Microsoft's TouchDevelop enable even
lay programmers to develop apps and distribute them in
app markets. A major challenge in this distribution
model is to ensure the quality of apps. Besides the
usual sources of software errors, mobile apps are
susceptible to errors caused by the non-determinism of
an event-based execution model, a volatile environment,
diverse hardware, and others. Many of these errors are
difficult to detect during testing, especially for
independent app developers, who are not supported by
test teams and elaborate test infrastructures. To
address this problem, we propose a static program
analysis that captures the specifics of mobile apps and
is efficient enough to provide feedback during the
development process. Experiments involving 51,456
published TouchDevelop scripts show that our analysis
analyzes 98\% of the scripts in under a minute, and
five seconds on average. Manual inspection of the
analysis results for a selection of all scripts shows
that most of the alarms are real errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Vora:2014:AEA,
author = "Keval Vora and Sai Charan Koduru and Rajiv Gupta",
title = "{ASPIRE}: exploiting asynchronous parallelism in
iterative algorithms using a relaxed consistency based
{DSM}",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "861--878",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660227",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many vertex-centric graph algorithms can be expressed
using asynchronous parallelism by relaxing certain
read-after-write data dependences and allowing threads
to compute vertex values using stale (i.e., not the
most recent) values of their neighboring vertices. We
observe that on distributed shared memory systems, by
converting synchronous algorithms into their
asynchronous counterparts, algorithms can be made
tolerant to high inter-node communication latency.
However, high inter-node communication latency can lead
to excessive use of stale values causing an increase in
the number of iterations required by the algorithms to
converge. Although by using bounded staleness we can
restrict the slowdown in the rate of convergence, this
also restricts the ability to tolerate communication
latency. In this paper we design a relaxed memory
consistency model and consistency protocol that
simultaneously tolerate communication latency and
minimize the use of stale values. This is achieved via
a coordinated use of best effort refresh policy and
bounded staleness. We demonstrate that for a range of
asynchronous graph algorithms and PDE solvers, on an
average, our approach outperforms algorithms based
upon: prior relaxed memory models that allow stale
values by at least 2.27x; and Bulk Synchronous Parallel
(BSP) model by 4.2x. We also show that our approach
frequently outperforms GraphLab, a popular distributed
graph processing framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Holt:2014:AAL,
author = "Brandon Holt and Preston Briggs and Luis Ceze and Mark
Oskin",
title = "{Alembic}: automatic locality extraction via
migration",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "879--894",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660194",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Partitioned Global Address Space (PGAS) environments
simplify writing parallel code for clusters because
they make data movement implicit --- dereferencing
global pointers automatically moves data around.
However, it does not free the programmer from needing
to reason about locality --- poor placement of data can
lead to excessive and even unnecessary communication.
For this reason, modern PGAS languages such as X10,
Chapel, and UPC allow programmers to express
data-layout constraints and explicitly move
computation. This places an extra burden on the
programmer, and is less effective for applications with
limited or data-dependent locality (e.g., graph
analytics). This paper proposes Alembic, a new static
analysis that frees programmers from having to manually
move computation to exploit locality in PGAS programs.
It works by determining regions of code that access the
same cluster node, then transforming the code to
migrate parts of the execution to increase the
proportion of accesses to local data. We implement the
analysis and transformation for C++ in LLVM and show
that in irregular application kernels, Alembic can
achieve 82\% of the performance of hand-tuned
communication (for comparison, na{\"\i}ve
compiler-generated communication achieves only 13\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Xiao:2014:CPL,
author = "Tian Xiao and Zhenyu Guo and Hucheng Zhou and Jiaxing
Zhang and Xu Zhao and Chencheng Ye and Xi Wang and Wei
Lin and Wenguang Chen and Lidong Zhou",
title = "{Cybertron}: pushing the limit on {I/O} reduction in
data-parallel programs",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "895--908",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660204",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "I/O reduction has been a major focus in optimizing
data-parallel programs for big-data processing. While
the current state-of-the-art techniques use static
program analysis to reduce I/O, Cybertron proposes a
new direction that incorporates runtime mechanisms to
push the limit further on I/O reduction. In particular,
Cybertron tracks how data is used in the computation
accurately at runtime to filter unused data at finer
granularity dynamically, beyond what current
static-analysis based mechanisms are capable of, and to
facilitate a new mechanism called constraint based
encoding for more efficient encoding. Cybertron has
been implemented and applied to production
data-parallel programs; our extensive evaluations on
real programs and real data have shown its
effectiveness on I/O reduction over the existing
mechanisms at reasonable CPU cost, and its improvement
on end-to-end performance in various network
environments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Radoi:2014:TIC,
author = "Cosmin Radoi and Stephen J. Fink and Rodric Rabbah and
Manu Sridharan",
title = "Translating imperative code to {MapReduce}",
journal = j-SIGPLAN,
volume = "49",
number = "10",
pages = "909--927",
month = oct,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2714064.2660228",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an approach for automatic translation of
sequential, imperative code into a parallel MapReduce
framework. Automating such a translation is
challenging: imperative updates must be translated into
a functional MapReduce form in a manner that both
preserves semantics and enables parallelism. Our
approach works by first translating the input code into
a functional representation, with loops succinctly
represented by fold operations. Then, guided by rewrite
rules, our system searches a space of equivalent
programs for an effective MapReduce implementation. The
rules include a novel technique for handling irregular
loop-carried dependencies using group-by operations to
enable greater parallelism. We have implemented our
technique in a tool called Mold. It translates
sequential Java code into code targeting the Apache
Spark runtime. We evaluated Mold on several real-world
kernels and found that in most cases Mold generated the
desired MapReduce program, even for codes with complex
indirect updates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '14 conference proceedings.",
}
@Article{Guyer:2014:UJT,
author = "Samuel Z. Guyer",
title = "Use of the {JVM} at {Twitter}: a bird's eye view",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "1--1",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2619208",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Specialties: 15+ years of virtual machine
implementation experience with special focus on memory
management / garbage collection. Close to 20 years of
C/C++ experience. 15+ years of Java experience. Expert
in concurrent/parallel programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Terei:2014:MHP,
author = "David Terei and Alex Aiken and Jan Vitek",
title = "{$ M^3 $}: high-performance memory management from
off-the-shelf components",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "3--13",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602995",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Real-world garbage collectors in managed languages are
complex. We investigate whether this complexity is
really necessary and show that by having a different
(but wider) interface between the collector and the
developer, we can achieve high performance with
off-the-shelf components for real applications. We
propose to assemble a memory manager out of multiple,
simple collection strategies and to expose the choice
of where to use those strategies in the program to the
developer. We describe and evaluate an instantiation of
our design for C. Our prototype allows developers to
choose on a per-type basis whether data should be
reference counted or reclaimed by a tracing collector.
While neither strategy is optimised, our empirical data
shows that we can achieve performance that is
competitive with hand-tuned C code for real-world
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Clifford:2014:AFB,
author = "Daniel Clifford and Hannes Payer and Michael
Starzinger and Ben L. Titzer",
title = "Allocation folding based on dominance",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "15--24",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602994",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Memory management system performance is of increasing
importance in today's managed languages. Two lingering
sources of overhead are the direct costs of memory
allocations and write barriers. This paper introduces
it allocation folding, an optimization technique where
the virtual machine automatically folds multiple memory
allocation operations in optimized code together into a
single, larger it allocation group. An allocation group
comprises multiple objects and requires just a single
bounds check in a bump-pointer style allocation, rather
than a check for each individual object. More
importantly, all objects allocated in a single
allocation group are guaranteed to be contiguous after
allocation and thus exist in the same generation, which
makes it possible to statically remove write barriers
for reference stores involving objects in the same
allocation group. Unlike object inlining, object
fusing, and object colocation, allocation folding
requires no special connectivity or ownership relation
between the objects in an allocation group. We present
our analysis algorithm to determine when it is safe to
fold allocations together and discuss our
implementation in V8, an open-source, production
JavaScript virtual machine. We present performance
results for the Octane and Kraken benchmark suites and
show that allocation folding is a strong performance
improvement, even in the presence of some heap
fragmentation. Additionally, we use four hand-selected
benchmarks JPEGEncoder, NBody, Soft3D, and Textwriter
where allocation folding has a large impact.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Ratnakar:2014:PPC,
author = "Bollu Ratnakar and Rupesh Nasre",
title = "Push-pull constraint graph for efficient points-to
analysis",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "25--33",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602989",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present techniques for efficient computation of
points-to information for C programs. Pointer analysis
is an important phase in the compilation process. The
computed points-to information and the alias
information is useful for client analyses from varied
domains such as bug finding, data-flow analysis,
identifying security vulnerabilities, and
parallelization, to name a few. Former research on
pointer analysis has indicated that the main bottleneck
towards scalability is manifested by the presence of
complex constraints (load p = *q and store *p = q
constraints) in the program. Complex constraints add
edges to the constraint graph in an unpredictable
manner and are responsible for initiating propagation
of large amounts of points-to information across edges.
We identify that the root cause to this issue is in the
homogeneous structure in the constraint graph, due to
which existing analyses treat loads and stores in a
uniform manner. To address these issues, we present two
techniques. First, we represent a constraint graph in a
non-homogeneous manner, treat loads and stores in
different ways, and employ a push-pull model for
non-uniform propagation. Second, we propose lazy
propagation which propagates information in the
constraint graph only when necessary. We illustrate the
effectiveness of our techniques using six large
open-source programs and show that they improve the
analysis time over a state-of-the-art BDD-based
analysis by 33\% and over Deep Propagation by 21\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Joisha:2014:STF,
author = "Pramod G. Joisha",
title = "Sticky tries: fast insertions, fast lookups, no
deletions for large key universes",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "35--46",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602998",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the sticky trie, a new variant of the
standard trie data structure that achieves
high-performing atomic insertions and lookups for large
key universes by precluding deletions. It has
applications in several areas, including address
tracking, logging, and garbage collection. By
leveraging features of a modern operating system, we
show how a runtime can exploit the absence of deletions
to realize an efficient sticky-trie implementation. We
report on an evaluation of two representative uses ---
compelling Bloom-filter alternative and fast substitute
for a garbage collector's sequential store buffer
(SSB). We demonstrate that a sticky trie, when compared
with what is perhaps among the simplest Bloom filters,
can be over 43\% faster, scale substantially better
with increasing threads, and yet be free of false
positives. By introducing the concept of an ideal SSB,
we also demonstrate that a sticky trie could be
competitive in performance with a class of SSBs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Brandt:2014:CPG,
author = "Steven R. Brandt and Hari Krishnan and Gokarna Sharma
and Costas Busch",
title = "Concurrent, parallel garbage collection in linear
time",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "47--58",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602990",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a new concurrent garbage
collection algorithm based on two types of reference,
strong and weak, to link the graph of objects. Strong
references connect the roots to all the nodes in the
graph but do not contain cycles. Weak references may,
however, contain cycles. Advantages of this system
include: (1) reduced processing, non-trivial garbage
collection work is only required when the last strong
reference is lost; (2) fewer memory traces to delete
objects, a garbage cycle only needs to be traversed
twice to be deleted; (3) fewer memory traces to retain
objects, since the collector can often prove objects
are reachable without fully tracing support cycles to
which the objects belong; (4) concurrency, it can run
in parallel with a live system without ``stopping the
world''; (5) parallel, because collection operations in
different parts of the memory can proceed at the same
time. Previous variants of this technique required
exponential cleanup time, but our algorithm is linear
in total time, i.e. any changes in the graph take only
O(N) time steps, where N is the number of edges in the
affected subgraph (e.g. the subgraph whose strong
support is affected by the operations).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Ugawa:2014:ROP,
author = "Tomoharu Ugawa and Richard E. Jones and Carl G.
Ritson",
title = "Reference object processing in on-the-fly garbage
collection",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "59--69",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602991",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most proposals for on-the-fly garbage collection
ignore the question of Java's weak and other reference
types. However, we show that reference types are
heavily used in DaCapo benchmarks. Of the few
collectors that do address this issue, most block
mutators, either globally or individually, while
processing reference types. We introduce a new
framework for processing reference types on-the-fly in
Jikes RVM. Our framework supports both insertion and
deletion write barriers. We have model checked our
algorithm and incorporated it in our new implementation
of the Sapphire on-the-fly collector. Using a deletion
barrier, we process references while mutators are
running in less than three times the time that previous
approaches take while mutators are halted; our overall
execution times are no worse, and often better.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Li:2014:MHD,
author = "Pengcheng Li and Chen Ding and Hao Luo",
title = "Modeling heap data growth using average liveness",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "71--82",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602997;
https://www.math.utah.edu/pub/tex/bib/java2010.bib",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Most of today's programs make use of a sizable heap to
store dynamic data. To characterize the heap dynamics,
this paper presents a set of metrics to measure the
average amount of data live and dead in a period of
execution. They are collectively called average
liveness. The paper defines these metrics of average
liveness, gives linear-time algorithms for measurement,
and discusses their use in finding the best heap size.
The algorithms are implemented in a Java tracing system
called Elephant Tracks and evaluated using the Dacapo
benchmarks running on the Oracle HotSpot and IBM J9
Java virtual machines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Zakkak:2014:JJM,
author = "Foivos S. Zakkak and Polyvios Pratikakis",
title = "{JDMM}: a {Java} memory model for non-cache-coherent
memory architectures",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "83--92",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602999",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "As the number of cores continuously grows, processor
designers are considering non coherent memories as more
scalable and energy efficient alternatives to the
current coherent ones. The Java Memory Model (JMM)
requires that all cores can access the Java heap. It
guarantees sequential consistency for data-race-free
programs and no out-of-thin-air values for non
data-race-free programs. To implement the Java Memory
Model over non-cache-coherent and distributed
architectures Java Virtual Machines (JVMs) are most
likely to employ software caching. In this work, (i) we
provide a formalization of the Java Memory Model for
non-cache-coherent and distributed memory
architectures, (ii) prove the adherence of our model
with the Java Memory Model and (iii) evaluate,
regarding its compliance to the Java Memory Model, a
state-of-the-art Java Virtual Machine implementation on
a non-cache-coherent architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Egielski:2014:MAM,
author = "Ian J. Egielski and Jesse Huang and Eddy Z. Zhang",
title = "Massive atomics for massive parallelism on {GPUs}",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "93--103",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602993",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "One important type of parallelism exploited in many
applications is reduction type parallelism. In these
applications, the order of the read-modify-write
updates to one shared data object can be arbitrary as
long as there is an imposed order for the
read-modify-write updates. The typical way to
parallelize these types of applications is to first let
every individual thread perform local computation and
save the results in thread-private data objects, and
then merge the results from all worker threads in the
reduction stage. All applications that fit into the map
reduce framework belong to this category. Additionally,
the machine learning, data mining, numerical analysis
and scientific simulation applications may also benefit
from reduction type parallelism. However, the
parallelization scheme via the usage of thread-private
data objects may not be vi- able in massively parallel
GPU applications. Because the number of concurrent
threads is extremely large (at least tens of thousands
of), thread-private data object creation may lead to
memory space explosion problems. In this paper, we
propose a novel approach to deal with shared data
object management for reduction type parallelism on
GPUs. Our approach exploits fine-grained parallelism
while at the same time maintaining good
programmability. It is based on the usage of intrinsic
hardware atomic instructions. Atomic operation may
appear to be expensive since it causes thread
serialization when multiple threads atomically update
the same memory object at the same time. However, we
discovered that, with appropriate atomic collision
reduction techniques, the atomic implementation can
out- perform the non-atomics implementation, even for
benchmarks known to have high performance non-atomics
GPU implementations. In the meantime, the usage of
atomics can greatly reduce coding complexity as neither
thread-private object management or explicit
thread-communication (for the shared data objects
protected by atomic operations) is necessary.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Ritson:2014:EGC,
author = "Carl G. Ritson and Tomoharu Ugawa and Richard E.
Jones",
title = "Exploring garbage collection with {Haswell} hardware
transactional memory",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "105--115",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602992",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Intel's latest processor microarchitecture, Haswell,
adds support for a restricted form of transactional
memory to the x86 programming model. We explore how
this can be applied to three garbage collection
scenarios in Jikes RVM: parallel copying, concurrent
copying and bitmap marking. We demonstrate gains in
concurrent copying speed over traditional
synchronisation mechanisms of 48-101\%. We also show
how similar but portable performance gains can be
achieved through software transactional memory
techniques. We identify the architectural overhead of
capturing sufficient work for transactional execution
as a major stumbling block to the effective use of
transactions in the other scenarios.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Bacon:2014:PRT,
author = "David F. Bacon and Perry Cheng and Sunil Shukla",
title = "Parallel real-time garbage collection of multiple
heaps in reconfigurable hardware",
journal = j-SIGPLAN,
volume = "49",
number = "11",
pages = "117--127",
month = nov,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775049.2602996",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite rapid increases in memory capacity,
reconfigurable hardware is still programmed in a very
low-level manner, generally without any dynamic
allocation at all. This limits productivity especially
as the larger chips encourage more and more complex
designs to be attempted. Prior work has shown that it
is possible to implement a real-time collector in
hardware and achieve stall-free operation --- but at
the price of severe restrictions on object layouts. We
present the first hardware garbage collector capable of
collecting multiple inter-connected heaps, thereby
allowing a rich set of object types. We show that for a
modest additional cost in logic and memory, we can
support multiple heaps at a clock frequency competitive
with monolithic, fixed-layout heaps. We evaluate the
hardware design by synthesizing it for a Xilinx FPGA
and using co-simulation to measure the run-time
behavior over a set of four benchmarks. Even at high
allocation and mutation rates the collector is able to
sustain stall-free (100\% minimum mutator utilization)
operation with up to 4 inter-connected heaps, while
only requiring between 1.1 and 1.7 times the maximum
live memory of the application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '14 conference proceedings.",
}
@Article{Wu:2014:EHS,
author = "Nicolas Wu and Tom Schrijvers and Ralf Hinze",
title = "Effect handlers in scope",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "1--12",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633358",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Algebraic effect handlers are a powerful means for
describing effectful computations. They provide a
lightweight and orthogonal technique to define and
compose the syntax and semantics of different effects.
The semantics is captured by handlers, which are
functions that transform syntax trees. Unfortunately,
the approach does not support syntax for scoping
constructs, which arise in a number of scenarios. While
handlers can be used to provide a limited form of
scope, we demonstrate that this approach constrains the
possible interactions of effects and rules out some
desired semantics. This paper presents two different
ways to capture scoped constructs in syntax, and shows
how to achieve different semantics by reordering
handlers. The first approach expresses scopes using the
existing algebraic handlers framework, but has some
limitations. The problem is fully solved in the second
approach where we introduce higher-order syntax.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Orchard:2014:EES,
author = "Dominic Orchard and Tomas Petricek",
title = "Embedding effect systems in {Haskell}",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "13--24",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633368",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Monads are now an everyday tool in functional
programming for abstracting and delimiting effects. The
link between monads and effect systems is well-known,
but in their typical use, monads provide a much more
coarse-grained view of effects. Effect systems capture
fine-grained information about the effects, but monads
provide only a binary view: effectful or pure. Recent
theoretical work has unified fine-grained effect
systems with monads using a monad-like structure
indexed by a monoid of effect annotations (called
parametric effect monads). This aligns the power of
monads with the power of effect systems. This paper
leverages recent advances in Haskell's type system (as
provided by GHC) to embed this approach in Haskell,
providing user-programmable effect systems. We explore
a number of practical examples that make Haskell even
better and safer for effectful programming. Along the
way, we relate the examples to other concepts, such as
Haskell's implicit parameters and coeffects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Blanchette:2014:ERN,
author = "Jasmin Christian Blanchette and Lars Hupel and Tobias
Nipkow and Lars Noschinski and Dmitriy Traytel",
title = "Experience report: the next 1100 {Haskell}
programmers",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "25--30",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633359",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We report on our experience teaching a Haskell-based
functional programming course to over 1100 students for
two winter terms. The syllabus was organized around
selected material from various sources. Throughout the
terms, we emphasized correctness through QuickCheck
tests and proofs by induction. The submission
architecture was coupled with automatic testing, giving
students the possibility to correct mistakes before the
deadline. To motivate the students, we complemented the
weekly assignments with an informal competition and
gave away trophies in a award ceremony.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Muranushi:2014:ERT,
author = "Takayuki Muranushi and Richard A. Eisenberg",
title = "Experience report: type-checking polymorphic units for
astrophysics research in {Haskell}",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "31--38",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633362",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many of the bugs in scientific programs have their
roots in mistreatment of physical dimensions, via
erroneous expressions in the quantity calculus. Now
that the type system in the Glasgow Haskell Compiler is
rich enough to support type-level integers and other
promoted datatypes, we can type-check the quantity
calculus in Haskell. In addition to basic
dimension-aware arithmetic and unit conversions, our
units library features an extensible system of
dimensions and units, a notion of dimensions apart from
that of units, and unit polymorphism designed to
describe the laws of physics. We demonstrate the
utility of units by writing an astrophysics research
paper. This work is free of unit concerns because every
quantity expression in the paper is rigorously
type-checked.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Vazou:2014:LER,
author = "Niki Vazou and Eric L. Seidel and Ranjit Jhala",
title = "{LiquidHaskell}: experience with refinement types in
the real world",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "39--51",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633366",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Haskell has many delightful features. Perhaps the one
most beloved by its users is its type system that
allows developers to specify and verify a variety of
program properties at compile time. However, many
properties, typically those that depend on
relationships between program values are impossible, or
at the very least, cumbersome to encode within the
existing type system. Many such properties can be
verified using a combination of Refinement Types and
external SMT solvers. We describe the refinement type
checker liquidHaskell, which we have used to specify
and verify a variety of properties of over 10,000 lines
of Haskell code from various popular libraries,
including containers, hscolour, bytestring, text,
vector-algorithms and xmonad. First, we present a
high-level overview of liquidHaskell, through a tour of
its features. Second, we present a qualitative
discussion of the kinds of properties that can be
checked --- ranging from generic application
independent criteria like totality and termination, to
application specific concerns like memory safety and
data structure correctness invariants. Finally, we
present a quantitative evaluation of the approach, with
a view towards measuring the efficiency and programmer
effort required for verification, and discuss the
limitations of the approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Pike:2014:SAE,
author = "Lee Pike",
title = "{SmartCheck}: automatic and efficient counterexample
reduction and generalization",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "53--64",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633365",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "QuickCheck is a powerful library for automatic
test-case generation. Because QuickCheck performs
random testing, some of the counterexamples discovered
are very large. QuickCheck provides an interface for
the user to write shrink functions to attempt to reduce
the size of counter examples. Hand-written
implementations of shrink can be complex, inefficient,
and consist of significant boilerplate code.
Furthermore, shrinking is only one aspect in debugging:
counterexample generalization is the process of
extrapolating from individual counterexamples to a
class of counterexamples, often requiring a flash of
insight from the programmer. To improve counterexample
reduction and generalization, we introduce SmartCheck.
SmartCheck is a debugging tool that reduces algebraic
data using generic search heuristics to efficiently
find smaller counterexamples. In addition to shrinking,
SmartCheck also automatically generalizes
counterexamples to formulas representing classes of
counterexamples. SmartCheck has been implemented for
Haskell and is freely available.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Maier:2014:HDS,
author = "Patrick Maier and Robert Stewart and Phil Trinder",
title = "The {HdpH DSLs} for scalable reliable computation",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "65--76",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633363",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The statelessness of functional computations
facilitates both parallelism and fault recovery. Faults
and non-uniform communication topologies are key
challenges for emergent large scale parallel
architectures. We report on HdpH and HdpH-RS, a pair of
Haskell DSLs designed to address these challenges for
irregular task-parallel computations on large
distributed-memory architectures. Both DSLs share an
API combining explicit task placement with
sophisticated work stealing. HdpH focuses on
scalability by making placement and stealing topology
aware whereas HdpH-RS delivers reliability by means of
fault tolerant work stealing. We present operational
semantics for both DSLs and investigate conditions for
semantic equivalence of HdpH and HdpH-RS programs, that
is, conditions under which topology awareness can be
transparently traded for fault tolerance. We detail how
the DSL implementations realise topology awareness and
fault tolerance. We report an initial evaluation of
scalability and fault tolerance on a 256-core cluster
and on up to 32K cores of an HPC platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Okabe:2014:SDW,
author = "Kiwamu Okabe and Takayuki Muranushi",
title = "Systems demonstration: writing {NetBSD} sound drivers
in {Haskell}",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "77--78",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633370",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most strongly typed, functional programming languages
are not equipped with a reentrant garbage collector.
Therefore such languages are not used for operating
systems programming, where the virtues of types are
most desired. We propose the use of Context-Local Heaps
(CLHs) to achieve reentrancy, which also increasing the
speed of garbage collection. We have implemented CLHs
in Ajhc, a Haskell compiler derived from jhc, rewritten
some NetBSD sound drivers using Ajhc, and benchmarked
them. The reentrant, faster garbage collection that
CLHs provide opens the path to type-assisted operating
systems programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Ekblad:2014:SCC,
author = "Anton Ekblad and Koen Claessen",
title = "A seamless, client-centric programming model for type
safe web applications",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "79--89",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633367",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new programming model for web
applications which is (1) seamless; one program and one
language is used to produce code for both client and
server, (2) client-centric; the programmer takes the
viewpoint of the client that runs code on the server
rather than the other way around, (3) functional and
type-safe, and (4) portable; everything is implemented
as a Haskell library that implicitly takes care of all
networking code. Our aim is to improve the painful and
error-prone experience of today's standard development
methods, in which clients and servers are coded in
different languages and communicate with each other
using ad-hoc protocols. We present the design of our
library called Haste.App, an example web application
that uses it, and discuss the implementation and the
compiler technology on which it depends.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Levy:2014:DPM,
author = "Amit A. Levy and David Terei and Deian Stefan and
David Mazi{\'e}res",
title = "Demo proposal: making web applications --- {XSafe}",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "91--91",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Simple is a web framework for Haskell. Simple came out
of our work on Hails, a platform for secure web
applications. For Hails, we needed a flexible web
framework that uses no unsafe language features and can
be used to build apps outside the IO monad. Unlike many
mainstream web frameworks, Simple does not enforce a
particular structure or paradigm. Instead, it simply
provides a set of composable building blocks to help
developers structure and organize their web
applications. We've used Simple to build both
traditional web applications as well as applications
with explicit, strong safety and security guarantees.
In the demonstration, we'll focus on the former ---
introducing the framework and motivating it's utility
for traditional web apps --- and show how we can
leverage the LIO information flow control library to
add mandatory security policies to apps.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Stefan:2014:BSS,
author = "Deian Stefan and Amit Levy and Alejandro Russo and
David Mazi{\'e}res",
title = "Building secure systems with {LIO} (demo)",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "93--94",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633371",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "LIO is a decentralized information flow control (DIFC)
system, implemented in Haskell. In this demo proposal,
we give an overview of the LIO library and show how LIO
can be used to build secure systems. In particular, we
show how to specify high-level security policies in the
context of web applications, and describe how LIO
automatically enforces these policies even in the
presence of untrusted code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Eisenberg:2014:PFT,
author = "Richard A. Eisenberg and Jan Stolarek",
title = "Promoting functions to type families in {Haskell}",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "95--106",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633361",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Haskell, as implemented in the Glasgow Haskell
Compiler (GHC), is enriched with many extensions that
support type-level programming, such as promoted
datatypes, kind polymorphism, and type families. Yet,
the expressiveness of the type-level language remains
limited. It is missing many features present at the
term level, including case expressions, anonymous
functions, partially-applied functions, and let
expressions. In this paper, we present an algorithm ---
with a proof of correctness --- to encode these
term-level constructs at the type level. Our approach
is automated and capable of promoting a wide array of
functions to type families. We also highlight and
discuss those term-level features that are not
promotable. In so doing, we offer a critique on GHC's
existing type system, showing what it is already
capable of and where it may want improvement. We
believe that delineating the mismatch between GHC's
term level and its type level is a key step toward
supporting dependently typed programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Morris:2014:SSH,
author = "J. Garrett Morris",
title = "A simple semantics for {Haskell} overloading",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "107--118",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633364",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As originally proposed, type classes provide
overloading and ad-hoc definition, but can still be
understood (and implemented) in terms of strictly
parametric calculi. This is not true of subsequent
extensions of type classes. Functional dependencies and
equality constraints allow the satisfiability of
predicates to refine typing; this means that the
interpretations of equivalent qualified types may not
be interconvertible. Overlapping instances and instance
chains allow predicates to be satisfied without
determining the implementations of their associated
class methods, introducing truly non-parametric
behavior. We propose a new approach to the semantics of
type classes, interpreting polymorphic expressions by
the behavior of each of their ground instances, but
without requiring that those behaviors be
parametrically determined. We argue that this approach
both matches the intuitive meanings of qualified types
and accurately models the behavior of programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Chakravarty:2014:FIC,
author = "Manuel M. T. Chakravarty",
title = "Foreign inline code: systems demonstration",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "119--120",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633372",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Adams:2014:ISP,
author = "Michael D. Adams and {\"O}mer S. Agacan",
title = "Indentation-sensitive parsing for {Parsec}",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "121--132",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633369",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several popular languages including Haskell and Python
use the indentation and layout of code as an essential
part of their syntax. In the past, implementations of
these languages used ad hoc techniques to implement
layout. Recent work has shown that a simple extension
to context-free grammars can replace these ad hoc
techniques and provide both formal foundations and
efficient parsing algorithms for indentation
sensitivity. However, that previous work is limited to
bottom-up, LR($k$) parsing, and many combinator-based
parsing frameworks including Parsec use top-down
algorithms that are outside its scope. This paper
remedies this by showing how to add indentation
sensitivity to parsing frameworks like Parsec. It
explores both the formal semantics of and efficient
algorithms for indentation sensitivity. It derives a
Parsec-based library for indentation-sensitive parsing
and presents benchmarks on a real-world language that
show its efficiency and practicality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{vanderPloeg:2014:RRR,
author = "Atze van der Ploeg and Oleg Kiselyov",
title = "Reflection without remorse: revealing a hidden
sequence to speed up monadic reflection",
journal = j-SIGPLAN,
volume = "49",
number = "12",
pages = "133--144",
month = dec,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775050.2633360",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A series of list appends or monadic binds for many
monads performs algorithmically worse when
left-associated. Continuation-passing style (CPS) is
well-known to cure this severe dependence of
performance on the association pattern. The advantage
of CPS dwindles or disappears if we have to examine or
modify the intermediate result of a series of appends
or binds, before continuing the series. Such
examination is frequently needed, for example, to
control search in non-determinism monads. We present an
alternative approach that is just as general as CPS but
more robust: it makes series of binds and other such
operations efficient regardless of the association
pattern-- and also provides efficient access to
intermediate results. The key is to represent such a
conceptual sequence as an efficient sequence data
structure. Efficient sequence data structures from the
literature are homogeneous and cannot be applied as
they are in a type-safe way to series of monadic binds.
We generalize them to type aligned sequences and show
how to construct their (assuredly order-preserving)
implementations. We demonstrate that our solution
solves previously undocumented, severe performance
problems in iteratees, LogicT transformers, free monads
and extensible effects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '14 conference proceedings.",
}
@Article{Rajamani:2015:ART,
author = "Sriram Rajamani",
title = "Automating Repetitive Tasks for the Masses",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "1--2",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2682621",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The programming languages (PL) research community has
traditionally catered to the needs of professional
programmers in the continuously evolving technical
industry. However, there is a new opportunity that
knocks our doors. The recent IT revolution has resulted
in the masses having access to personal computing
devices. More than 99\% of these computer users are
non-programmers and are today limited to being passive
consumers of the software that is made available to
them. Can we empower these users to more effectively
leverage computers for their daily tasks? The
formalisms, techniques, and tools developed in the PL
and the formal methods research communities can play a
pivotal role!",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Mellies:2015:FTR,
author = "Paul-Andr{\'e} Melli{\`e}s and Noam Zeilberger",
title = "Functors are Type Refinement Systems",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "3--16",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676970",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The standard reading of type theory through the lens
of category theory is based on the idea of viewing a
type system as a category of well-typed terms. We
propose a basic revision of this reading: rather than
interpreting type systems as categories, we describe
them as functors from a category of typing derivations
to a category of underlying terms. Then, turning this
around, we explain how in fact any functor gives rise
to a generalized type system, with an abstract notion
of typing judgment, typing derivations and typing
rules. This leads to a purely categorical reformulation
of various natural classes of type systems as natural
classes of functors. The main purpose of this paper is
to describe the general framework (which can also be
seen as providing a categorical analysis of refinement
types ), and to present a few applications. As a larger
case study, we revisit Reynolds' paper on ``The Meaning
of Types'' (2000), showing how the paper's main results
may be reconstructed along these lines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Krishnaswami:2015:ILD,
author = "Neelakantan R. Krishnaswami and Pierre Pradic and Nick
Benton",
title = "Integrating Linear and Dependent Types",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "17--30",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676969",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we show how to integrate linear types
with type dependency, by extending the
linear/non-linear calculus of Benton to support type
dependency. Next, we give an application of this
calculus by giving a proof-theoretic account of
imperative programming, which requires extending the
calculus with computationally irrelevant
quantification, proof irrelevance, and a monad of
computations. We show the soundness of our theory by
giving a realizability model in the style of Nuprl,
which permits us to validate not only the beta-laws for
each type, but also the eta-laws. These extensions
permit us to decompose Hoare triples into a collection
of simpler type-theoretic connectives, yielding a rich
equational theory for dependently-typed higher-order
imperative programs. Furthermore, both the type theory
and its model are relatively simple, even when all of
the extensions are considered.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Sojakova:2015:HIT,
author = "Kristina Sojakova",
title = "Higher Inductive Types as Homotopy-Initial Algebras",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "31--42",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676983",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Homotopy Type Theory is a new field of mathematics
based on the recently-discovered correspondence between
Martin-L{\"o}f's constructive type theory and abstract
homotopy theory. We have a powerful interplay between
these disciplines --- we can use geometric intuition to
formulate new concepts in type theory and, conversely,
use type-theoretic machinery to verify and often
simplify existing mathematical proofs. Higher inductive
types form a crucial part of this new system since they
allow us to represent mathematical objects, such as
spheres, tori, pushouts, and quotients, in the type
theory. We investigate a class of higher inductive
types called W-suspensions which generalize
Martin-L{\"o}f's well-founded trees. We show that a
propositional variant of W-suspensions, whose
computational behavior is determined up to a higher
path, is characterized by the universal property of
being a homotopy-initial algebra. As a corollary we get
that W-suspensions in the strict form are
homotopy-initial.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Ngo:2015:RES,
author = "Minh Ngo and Fabio Massacci and Dimiter Milushev and
Frank Piessens",
title = "Runtime Enforcement of Security Policies on Black Box
Reactive Programs",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "43--54",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676978",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Security enforcement mechanisms like execution
monitors are used to make sure that some untrusted
program complies with a policy. Different enforcement
mechanisms have different strengths and weaknesses and
hence it is important to understand the qualities of
various enforcement mechanisms. This paper studies
runtime enforcement mechanisms for reactive programs.
We study the impact of two important constraints that
many practical enforcement mechanisms satisfy: (1) the
enforcement mechanism must handle each input/output
event in finite time and on occurrence of the event (as
opposed to for instance Ligatti's edit automata that
have the power to buffer events for an arbitrary amount
of time), and (2) the enforcement mechanism treats the
untrusted program as a black box: it can monitor and/or
edit the input/output events that the program exhibits
on execution and it can explore alternative executions
of the program by running additional copies of the
program and providing these different inputs. It can
not inspect the source or machine code of the untrusted
program. Such enforcement mechanisms are important in
practice: they include for instance many execution
monitors, virtual machine monitors, and secure
multi-execution or shadow executions. We establish
upper and lower bounds for the class of policies that
are enforceable by such black box mechanisms, and we
propose a generic enforcement mechanism that works for
a wide range of policies. We also show how our generic
enforcement mechanism can be instantiated to enforce
specific classes of policies, at the same time showing
that many existing enforcement mechanisms are optimized
instances of our construction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Barthe:2015:HOA,
author = "Gilles Barthe and Marco Gaboardi and Emilio Jes{\'u}s
Gallego Arias and Justin Hsu and Aaron Roth and
Pierre-Yves Strub",
title = "Higher-Order Approximate Relational Refinement Types
for Mechanism Design and Differential Privacy",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "55--68",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677000",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mechanism design is the study of algorithm design
where the inputs to the algorithm are controlled by
strategic agents, who must be incentivized to
faithfully report them. Unlike typical programmatic
properties, it is not sufficient for algorithms to
merely satisfy the property, incentive properties are
only useful if the strategic agents also believe this
fact. Verification is an attractive way to convince
agents that the incentive properties actually hold, but
mechanism design poses several unique challenges:
interesting properties can be sophisticated relational
properties of probabilistic computations involving
expected values, and mechanisms may rely on other
probabilistic properties, like differential privacy, to
achieve their goals. We introduce a relational
refinement type system, called HOARe2, for verifying
mechanism design and differential privacy. We show that
HOARe2 is sound w.r.t. a denotational semantics, and
correctly models (epsilon,delta)-differential privacy;
moreover, we show that it subsumes DFuzz, an existing
linear dependent type system for differential privacy.
Finally, we develop an SMT-based implementation of
HOARe2 and use it to verify challenging examples of
mechanism design, including auctions and aggregative
games, and new proposed examples from differential
privacy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Ebadi:2015:DPN,
author = "Hamid Ebadi and David Sands and Gerardo Schneider",
title = "Differential Privacy: Now it's Getting Personal",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "69--81",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677005",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Differential privacy provides a way to get useful
information about sensitive data without revealing much
about any one individual. It enjoys many nice
compositionality properties not shared by other
approaches to privacy, including, in particular,
robustness against side-knowledge. Designing
differentially private mechanisms from scratch can be a
challenging task. One way to make it easier to
construct new differential private mechanisms is to
design a system which allows more complex mechanisms
(programs) to be built from differentially private
building blocks in principled way, so that the
resulting programs are guaranteed to be differentially
private by construction. This paper is about a new
accounting principle for building differentially
private programs. It is based on a simple
generalisation of classic differential privacy which we
call Personalised Differential Privacy (PDP). In PDP
each individual has its own personal privacy level. We
describe ProPer, a interactive system for implementing
PDP which maintains a privacy budget for each
individual. When a primitive query is made on data
derived from individuals, the provenance of the
involved records determines how the privacy budget of
an individual is affected: the number of records
derived from Alice determines the multiplier for the
privacy decrease in Alice's budget. This offers some
advantages over previous systems, in particular its
fine-grained character allows better utilisation of the
privacy budget than mechanisms based purely on the
concept of global sensitivity, and it applies naturally
to the case of a live database where new individuals
are added over time. We provide a formal model of the
ProPer approach, prove that it provides personalised
differential privacy, and describe a prototype
implementation based on McSherry's PINQ system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Tang:2015:SBC,
author = "Hao Tang and Xiaoyin Wang and Lingming Zhang and Bing
Xie and Lu Zhang and Hong Mei",
title = "Summary-Based Context-Sensitive Data-Dependence
Analysis in Presence of Callbacks",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "83--95",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676997",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Building a summary for library code is a common
approach to speeding up the analysis of client code. In
presence of callbacks, some reachability relationships
between library nodes cannot be obtained during
library-code summarization. Thus, the library code may
have to be analyzed again during the analysis of the
client code with the library summary. In this paper, we
propose to summarize library code with
tree-adjoining-language (TAL) reachability. Compared
with the summary built with context-free-language (CFL)
reachability, the summary built with TAL reachability
further contains conditional reachability
relationships. The conditional reachability
relationships can lead to much lighter analysis of the
library code during the client code analysis with the
TAL-reachability-based library summary. We also
performed an experimental comparison of
context-sensitive data-dependence analysis with the
TAL-reachability-based library summary and
context-sensitive data-dependence analysis with the
CFL-reachability-based library summary using 15
benchmark subjects. Our experimental results
demonstrate that the former has an 8X speed-up over the
latter on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Chatterjee:2015:FAA,
author = "Krishnendu Chatterjee and Rasmus Ibsen-Jensen and
Andreas Pavlogiannis and Prateesh Goyal",
title = "Faster Algorithms for Algebraic Path Properties in
Recursive State Machines with Constant Treewidth",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "97--109",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676979",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Interprocedural analysis is at the heart of numerous
applications in programming languages, such as alias
analysis, constant propagation, etc. Recursive state
machines (RSMs) are standard models for interprocedural
analysis. We consider a general framework with RSMs
where the transitions are labeled from a semiring, and
path properties are algebraic with semiring operations.
RSMs with algebraic path properties can model
interprocedural dataflow analysis problems, the
shortest path problem, the most probable path problem,
etc. The traditional algorithms for interprocedural
analysis focus on path properties where the starting
point is fixed as the entry point of a specific method.
In this work, we consider possible multiple queries as
required in many applications such as in alias
analysis. The study of multiple queries allows us to
bring in a very important algorithmic distinction
between the resource usage of the one-time
preprocessing vs for each individual query. The second
aspect that we consider is that the control flow graphs
for most programs have constant treewidth. Our main
contributions are simple and implementable algorithms
that support multiple queries for algebraic path
properties for RSMs that have constant treewidth. Our
theoretical results show that our algorithms have small
additional one-time preprocessing, but can answer
subsequent queries significantly faster as compared to
the current best-known solutions for several important
problems, such as interprocedural reachability and
shortest path. We provide a prototype implementation
for interprocedural reachability and intraprocedural
shortest path that gives a significant speed-up on
several benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Raychev:2015:PPP,
author = "Veselin Raychev and Martin Vechev and Andreas Krause",
title = "Predicting Program Properties from {``Big Code''}",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "111--124",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677009",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new approach for predicting program
properties from massive codebases (aka ``Big Code'').
Our approach first learns a probabilistic model from
existing data and then uses this model to predict
properties of new, unseen programs. The key idea of our
work is to transform the input program into a
representation which allows us to phrase the problem of
inferring program properties as structured prediction
in machine learning. This formulation enables us to
leverage powerful probabilistic graphical models such
as conditional random fields (CRFs) in order to perform
joint prediction of program properties. As an example
of our approach, we built a scalable prediction engine
called JSNice for solving two kinds of problems in the
context of JavaScript: predicting (syntactic) names of
identifiers and predicting (semantic) type annotations
of variables. Experimentally, JSNice predicts correct
names for 63\% of name identifiers and its type
annotation predictions are correct in 81\% of the
cases. In the first week since its release, JSNice was
used by more than 30,000 developers and in only few
months has become a popular tool in the JavaScript
developer community. By formulating the problem of
inferring program properties as structured prediction
and showing how to perform both learning and inference
in this context, our work opens up new possibilities
for attacking a wide range of difficult problems in the
context of ``Big Code'' including invariant generation,
decompilation, synthesis and others.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Alur:2015:DDL,
author = "Rajeev Alur and Loris D'Antoni and Mukund
Raghothaman",
title = "{DReX}: a Declarative Language for Efficiently
Evaluating Regular String Transformations",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "125--137",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676981",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "We present DReX, a declarative language that can
express all regular string-to-string transformations,
and can still be efficiently evaluated. The class of
regular string transformations has a robust theoretical
foundation including multiple characterizations,
closure properties, and decidable analysis questions,
and admits a number of string operations such as
insertion, deletion, substring swap, and reversal.
Recent research has led to a characterization of
regular string transformations using a primitive set of
function combinators analogous to the definition of
regular languages using regular expressions. While
these combinators form the basis for the language DReX
proposed in this paper, our main technical focus is on
the complexity of evaluating the output of a DReX
program on a given input string. It turns out that the
natural evaluation algorithm involves dynamic
programming, leading to complexity that is cubic in the
length of the input string. Our main contribution is
identifying a consistency restriction on the use of
combinators in DReX programs, and a single-pass
evaluation algorithm for consistent programs with time
complexity that is linear in the length of the input
string and polynomial in the size of the program. We
show that the consistency restriction does not limit
the expressiveness, and whether a DReX program is
consistent can be checked efficiently. We report on a
prototype implementation, and evaluate it using a
representative set of text processing tasks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Veanes:2015:DPS,
author = "Margus Veanes and Todd Mytkowicz and David Molnar and
Benjamin Livshits",
title = "Data-Parallel String-Manipulating Programs",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "139--152",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677014",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "String-manipulating programs are an important class of
programs with applications in malware detection,
graphics, input sanitization for Web security, and
large-scale HTML processing. This paper extends prior
work on BEK, an expressive domain-specific language for
writing string-manipulating programs, with algorithmic
insights that make BEK both analyzable and
data-parallel. By analyzable we mean that unlike most
general purpose programming languages, many algebraic
properties of a BEK program are decidable (i.e., one
can check whether two programs commute or compute the
inverse of a program). By data-parallel we mean that a
BEK program can compute on arbitrary subsections of its
input in parallel, thus exploiting parallel hardware.
This latter requirement is particularly important for
programs which operate on large data: without data
parallelism, a programmer cannot hide the latency of
reading data from various storage media (i.e., reading
a terabyte of data from a modern hard drive takes about
3 hours). With a data-parallel approach, the system can
split data across multiple disks and thus hide the
latency of reading the data. A BEK program is
expressive: a programmer can use conditionals, switch
statements, and registers --- or local variables --- in
order to implement common string-manipulating programs.
Unfortunately, this expressivity induces data
dependencies, which are an obstacle to parallelism. The
key contribution of this paper is an algorithm which
automatically removes these data dependencies by
mapping a BEK program into a intermediate format
consisting of symbolic transducers, which extend
classical transducers with symbolic predicates and
symbolic assignments. We present a novel algorithm that
we call exploration which performs symbolic loop
unrolling of these transducers to obtain simplified
versions of the original program. We show how these
simplified versions can then be lifted to a stateless
form, and from there compiled to data-parallel
hardware. To evaluate the efficacy of our approach, we
demonstrate up to 8x speedups for a number of
real-world, BEK programs, (e.g., HTML encoder and
decoder) on data-parallel hardware. To the best of our
knowledge, these are the first data parallel
implementation of these programs. To validate that our
approach is correct, we use an automatic testing
technique to compare our generated code to the original
implementations and find no semantic deviations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Chlipala:2015:UWS,
author = "Adam Chlipala",
title = "{Ur\slash Web}: a Simple Model for Programming the
{Web}",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "153--165",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677004",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The World Wide Web has evolved gradually from a
document delivery platform to an architecture for
distributed programming. This largely unplanned
evolution is apparent in the set of interconnected
languages and protocols that any Web application must
manage. This paper presents Ur/Web, a domain-specific,
statically typed functional programming language with a
much simpler model for programming modern Web
applications. Ur/Web's model is unified, where programs
in a single programming language are compiled to other
``Web standards'' languages as needed; supports novel
kinds of encapsulation of Web-specific state; and
exposes simple concurrency, where programmers can
reason about distributed, multithreaded applications
via a mix of transactions and cooperative preemption.
We give a tutorial introduction to the main features of
Ur/Web and discuss the language implementation and the
production Web applications that use it.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Rastogi:2015:SEG,
author = "Aseem Rastogi and Nikhil Swamy and C{\'e}dric Fournet
and Gavin Bierman and Panagiotis Vekris",
title = "Safe \& Efficient Gradual Typing for {TypeScript}",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "167--180",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676971",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Current proposals for adding gradual typing to
JavaScript, such as Closure, TypeScript and Dart, forgo
soundness to deal with issues of scale, code reuse, and
popular programming patterns. We show how to address
these issues in practice while retaining soundness. We
design and implement a new gradual type system,
prototyped for expediency as a 'Safe' compilation mode
for TypeScript. Our compiler achieves soundness by
enforcing stricter static checks and embedding residual
runtime checks in compiled code. It emits plain
JavaScript that runs on stock virtual machines. Our
main theorem is a simulation that ensures that the
checks introduced by Safe TypeScript (1) catch any
dynamic type error, and (2) do not alter the semantics
of type-safe TypeScript code. Safe TypeScript is
carefully designed to minimize the performance overhead
of runtime checks. At its core, we rely on two new
ideas: differential subtyping, a new form of coercive
subtyping that computes the minimum amount of runtime
type information that must be added to each object; and
an erasure modality, which we use to safely and
selectively erase type information. This allows us to
scale our design to full-fledged TypeScript, including
arrays, maps, classes, inheritance, overloading, and
generic types. We validate the usability and
performance of Safe TypeScript empirically by
type-checking and compiling around 120,000 lines of
existing TypeScript source code. Although runtime
checks can be expensive, the end-to-end overhead is
small for code bases that already have type
annotations. For instance, we bootstrap the Safe
TypeScript compiler (90,000 lines including the base
TypeScript compiler): we measure a 15\% runtime
overhead for type safety, and also uncover programming
errors as type safety violations. We conclude that, at
least during development and testing, subjecting
JavaScript/TypeScript programs to safe gradual typing
adds significant value to source type annotations at a
modest cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Greenberg:2015:SEM,
author = "Michael Greenberg",
title = "Space-Efficient Manifest Contracts",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "181--194",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676967",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The standard algorithm for higher-order contract
checking can lead to unbounded space consumption and
can destroy tail recursion, altering a program's
asymptotic space complexity. While space efficiency for
gradual types---contracts mediating untyped and typed
code---is well studied, sound space efficiency for
manifest contracts---contracts that check stronger
properties than simple types, e.g., ``is a natural''
instead of ''is an integer''---remains an open problem.
We show how to achieve sound space efficiency for
manifest contracts with strong predicate contracts. The
essential trick is breaking the contract checking down
into coercions: structured, blame-annotated lists of
checks. By carefully preventing duplicate coercions
from appearing, we can restore space efficiency while
keeping the same observable behavior.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Sekiyama:2015:MCD,
author = "Taro Sekiyama and Yuki Nishida and Atsushi Igarashi",
title = "Manifest Contracts for Datatypes",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "195--207",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676996",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study algebraic data types in a manifest contract
system, a software contract system where contract
information occurs as refinement types. We first
compare two simple approaches: refinements on type
constructors and refinements on data constructors. For
example, lists of positive integers can be described by
{l:int list | for_all (lambda y. y > 0) l} in the
former, whereas by a user-defined datatype pos_list
with cons of type {x:int | x > 0} X pos_list->pos_list
in the latter. The two approaches are complementary:
the former makes it easier for a programmer to write
types and the latter enables more efficient contract
checking. To take the best of both worlds, we propose
(1) a syntactic translation from refinements on type
constructors to equivalent refinements on data
constructors and (2) dynamically checked casts between
different but compatible datatypes such as int list and
pos_list. We define a manifest contract calculus to
formalize the semantics of the casts and prove that the
translation is correct.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Vafeiadis:2015:CCO,
author = "Viktor Vafeiadis and Thibaut Balabonski and Soham
Chakraborty and Robin Morisset and Francesco Zappa
Nardelli",
title = "Common Compiler Optimisations are Invalid in the {C11}
Memory Model and what we can do about it",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "209--220",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676995",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show that the weak memory model introduced by the
2011 C and C++ standards does not permit many common
source-to-source program transformations (such as
expression linearisation and ``roach motel''
reorderings) that modern compilers perform and that are
deemed to be correct. As such it cannot be used to
define the semantics of intermediate languages of
compilers, as, for instance, LLVM aimed to. We consider
a number of possible local fixes, some strengthening
and some weakening the model. We evaluate the proposed
fixes by determining which program transformations are
valid with respect to each of the patched models. We
provide formal Coq proofs of their correctness or
counterexamples as appropriate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Lange:2015:CMG,
author = "Julien Lange and Emilio Tuosto and Nobuko Yoshida",
title = "From Communicating Machines to Graphical
Choreographies",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "221--232",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676964",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphical choreographies, or global graphs, are
general multiparty session specifications featuring
expressive constructs such as forking, merging, and
joining for representing application-level protocols.
Global graphs can be directly translated into modelling
notations such as BPMN and UML. This paper presents an
algorithm whereby a global graph can be constructed
from asynchronous interactions represented by
communicating finite-state machines (CFSMs). Our
results include: a sound and complete characterisation
of a subset of safe CFSMs from which global graphs can
be constructed; an algorithm to translate CFSMs to
global graphs; a time complexity analysis; and an
implementation of our theory, as well as an
experimental evaluation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Dodds:2015:SCT,
author = "Mike Dodds and Andreas Haas and Christoph M. Kirsch",
title = "A Scalable, Correct Time-Stamped Stack",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "233--246",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676963",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent data-structures, such as stacks, queues,
and deques, often implicitly enforce a total order over
elements in their underlying memory layout. However,
much of this order is unnecessary: linearizability only
requires that elements are ordered if the insert
methods ran in sequence. We propose a new approach
which uses timestamping to avoid unnecessary ordering.
Pairs of elements can be left unordered if their
associated insert operations ran concurrently, and
order imposed as necessary at the eventual removal. We
realise our approach in a new non-blocking
data-structure, the TS (timestamped) stack. Using the
same approach, we can define corresponding queue and
deque data-structures. In experiments on x86, the TS
stack outperforms and outscales all its competitors ---
for example, it outperforms the elimination-backoff
stack by factor of two. In our approach, more
concurrency translates into less ordering, giving
less-contended removal and thus higher performance and
scalability. Despite this, the TS stack is linearizable
with respect to stack semantics. The weak internal
ordering in the TS stack presents a challenge when
establishing linearizability: standard techniques such
as linearization points work well when there exists a
total internal order. We present a new stack theorem,
mechanised in Isabelle, which characterises the
orderings sufficient to establish stack semantics. By
applying our stack theorem, we show that the TS stack
is indeed linearizable. Our theorem constitutes a new,
generic proof technique for concurrent stacks, and it
paves the way for future weakly ordered data-structure
designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Jourdan:2015:FVC,
author = "Jacques-Henri Jourdan and Vincent Laporte and Sandrine
Blazy and Xavier Leroy and David Pichardie",
title = "A Formally-Verified {C} Static Analyzer",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "247--259",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676966",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper reports on the design and soundness proof,
using the Coq proof assistant, of Verasco, a static
analyzer based on abstract interpretation for most of
the ISO C 1999 language (excluding recursion and
dynamic allocation). Verasco establishes the absence of
run-time errors in the analyzed programs. It enjoys a
modular architecture that supports the extensible
combination of multiple abstract domains, both
relational and non-relational. Verasco integrates with
the CompCert formally-verified C compiler so that not
only the soundness of the analysis results is
guaranteed with mathematical certitude, but also the
fact that these guarantees carry over to the compiled
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Giacobazzi:2015:APA,
author = "Roberto Giacobazzi and Francesco Logozzo and Francesco
Ranzato",
title = "Analyzing Program Analyses",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "261--273",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676987",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We want to prove that a static analysis of a given
program is complete, namely, no imprecision arises when
asking some query on the program behavior in the
concrete (ie, for its concrete semantics) or in the
abstract (ie, for its abstract interpretation).
Completeness proofs are therefore useful to assign
confidence to alarms raised by static analyses. We
introduce the completeness class of an abstraction as
the set of all programs for which the abstraction is
complete. Our first result shows that for any
nontrivial abstraction, its completeness class is not
recursively enumerable. We then introduce a stratified
deductive system to prove the completeness of program
analyses over an abstract domain A. We prove the
soundness of the deductive system. We observe that the
only sources of incompleteness are assignments and
Boolean tests --- unlikely a common belief in static
analysis, joins do not induce incompleteness. The first
layer of this proof system is generic,
abstraction-agnostic, and it deals with the standard
constructs for program composition, that is, sequential
composition, branching and guarded iteration. The
second layer is instead abstraction-specific: the
designer of an abstract domain A provides conditions
for completeness in A of assignments and Boolean tests
which have to be checked by a suitable static analysis
or assumed in the completeness proof as hypotheses. We
instantiate the second layer of this proof system first
with a generic nonrelational abstraction in order to
provide a sound rule for the completeness of
assignments. Orthogonally, we instantiate it to the
numerical abstract domains of Intervals and Octagons,
providing necessary and sufficient conditions for the
completeness of their Boolean tests and of assignments
for Octagons.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Stewart:2015:CC,
author = "Gordon Stewart and Lennart Beringer and Santiago
Cuellar and Andrew W. Appel",
title = "Compositional {CompCert}",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "275--287",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676985",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper reports on the development of Compositional
CompCert, the first verified separate compiler for C.
Specifying and proving separate compilation for C is
made challenging by the coincidence of: compiler
optimizations, such as register spilling, that
introduce compiler-managed (private) memory regions
into function stack frames, and C's stack-allocated
addressable local variables, which may leak portions of
stack frames to other modules when their addresses are
passed as arguments to external function calls. The
CompCert compiler, as built/proved by Leroy etal
2006--2014, has proofs of correctness for whole
programs, but its simulation relations are too weak to
specify or prove separately compiled modules. Our
technical contributions that make Compositional
CompCert possible include: language-independent
linking, a new operational model of multilanguage
linking that supports strong semantic contextual
equivalences; and structured simulations, a refinement
of Beringer etal logical simulation relations that
enables expressive module-local invariants on the state
communicated between compilation units at runtime. All
the results in the paper have been formalized in Coq
and are available for download together with the
Compositional CompCert compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Castagna:2015:PFS,
author = "Giuseppe Castagna and Kim Nguyen and Zhiwu Xu and
Pietro Abate",
title = "Polymorphic Functions with Set-Theoretic Types: {Part
2}: Local Type Inference and Type Reconstruction",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "289--302",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676991",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This article is the second part of a two articles
series about the definition of higher order polymorphic
functions in a type system with recursive types and
set-theoretic type connectives (unions, intersections,
and negations). In the first part, presented in a
companion paper, we defined and studied the syntax,
semantics, and evaluation of the explicitly-typed
version of a calculus, in which type instantiation is
driven by explicit instantiation annotations. In this
second part we present a local type inference system
that allows the programmer to omit explicit
instantiation annotations for function applications,
and a type reconstruction system that allows the
programmer to omit explicit type annotations for
function definitions. The work presented in the two
articles provides the theoretical foundations and
technical machinery needed to design and implement
higher-order polymorphic functional languages with
union and intersection types and/or for semi-structured
data processing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Garcia:2015:PTS,
author = "Ronald Garcia and Matteo Cimini",
title = "Principal Type Schemes for Gradual Programs",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "303--315",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676992",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Gradual typing is a discipline for integrating dynamic
checking into a static type system. Since its
introduction in functional languages, it has been
adapted to a variety of type systems, including
object-oriented, security, and substructural. This work
studies its application to implicitly typed languages
based on type inference. Siek and Vachharajani designed
a gradual type inference system and algorithm that
infers gradual types but still rejects ill-typed static
programs. However, the type system requires local
reasoning about type substitutions, an imperative
inference algorithm, and a subtle correctness
statement. This paper introduces a new approach to
gradual type inference, driven by the principle that
gradual inference should only produce static types. We
present a static implicitly typed language, its gradual
counterpart, and a type inference procedure. The
gradual system types the same programs as Siek and
Vachharajani, but has a modular structure amenable to
extension. The language admits let-polymorphism, and
its dynamics are defined by translation to the
Polymorphic Blame Calculus. The principal types
produced by our initial type system mask the
distinction between static parametric polymorphism and
polymorphism that can be attributed to gradual typing.
To expose this difference, we distinguish static type
parameters from gradual type parameters and reinterpret
gradual type consistency accordingly. The resulting
extension enables programs to be interpreted using
either the polymorphic or monomorphic Blame Calculi.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Lourenco:2015:DIF,
author = "Lu{\'\i}sa Louren{\c{c}}o and Lu{\'\i}s Caires",
title = "Dependent Information Flow Types",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "317--328",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676994",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we develop a novel notion of dependent
information flow types. Dependent information flow
types fit within the standard framework of dependent
type theory, but, unlike usual dependent types,
crucially allow the security level of a type, rather
than just the structural data type itself, to depend on
runtime values. Our dependent function and dependent
sum information flow types provide a direct, natural
and elegant way to express and enforce fine grained
security policies on programs, including programs that
manipulate structured data types in which the security
level of a structure field may depend on values
dynamically stored in other fields, still considered a
challenge to security enforcement in software systems
such as data-centric web-based applications. We base
our development on the very general setting of a
minimal lambda-calculus with references and
collections. We illustrate its expressiveness, showing
how secure operations on relevant scenarios can be
modelled and analysed using our dependent information
flow type system, which is also shown to be amenable to
algorithmic type checking. Our main results include
type-safety and non-interference theorems ensuring that
well-typed programs do not violate prescribed security
policies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Preda:2015:ASA,
author = "Mila Dalla Preda and Roberto Giacobazzi and Arun
Lakhotia and Isabella Mastroeni",
title = "Abstract Symbolic Automata: Mixed syntactic\slash
semantic similarity analysis of executables",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "329--341",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676986",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a model for mixed syntactic/semantic
approximation of programs based on symbolic finite
automata (SFA). The edges of SFA are labeled by
predicates whose semantics specifies the denotations
that are allowed by the edge. We introduce the notion
of abstract symbolic finite automaton (ASFA) where
approximation is made by abstract interpretation of
symbolic finite automata, acting both at syntactic
(predicate) and semantic (denotation) level. We
investigate in the details how the syntactic and
semantic abstractions of SFA relate to each other and
contribute to the determination of the recognized
language. Then we introduce a family of transformations
for simplifying ASFA. We apply this model to prove
properties of commonly used tools for similarity
analysis of binary executables. Following the structure
of their control flow graphs, disassembled binary
executables are represented as (concrete) SFA, where
states are program points and predicates represent the
(possibly infinite) I/O semantics of each basic block
in a constraint form. Known tools for binary code
analysis are viewed as specific choices of symbolic and
semantic abstractions in our framework, making symbolic
finite automata and their abstract interpretations a
unifying model for comparing and reasoning about
soundness and completeness of analyses of low-level
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Foster:2015:CDP,
author = "Nate Foster and Dexter Kozen and Matthew Milano and
Alexandra Silva and Laure Thompson",
title = "A Coalgebraic Decision Procedure for {NetKAT}",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "343--355",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677011",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "NetKAT is a domain-specific language and logic for
specifying and verifying network packet-processing
functions. It consists of Kleene algebra with tests
(KAT) augmented with primitives for testing and
modifying packet headers and encoding network
topologies. Previous work developed the design of the
language and its standard semantics, proved the
soundness and completeness of the logic, defined a
PSPACE algorithm for deciding equivalence, and
presented several practical applications. This paper
develops the coalgebraic theory of NetKAT, including a
specialized version of the Brzozowski derivative, and
presents a new efficient algorithm for deciding the
equational theory using bisimulation. The coalgebraic
structure admits an efficient sparse representation
that results in a significant reduction in the size of
the state space. We discuss the details of our
implementation and optimizations that exploit NetKAT's
equational axioms and coalgebraic structure to yield
significantly improved performance. We present results
from experiments demonstrating that our tool is
competitive with state-of-the-art tools on several
benchmarks including all-pairs connectivity,
loop-freedom, and translation validation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Pous:2015:SAL,
author = "Damien Pous",
title = "Symbolic Algorithms for Language Equivalence and
{Kleene} Algebra with Tests",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "357--368",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677007",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose algorithms for checking language
equivalence of finite automata over a large alphabet.
We use symbolic automata, where the transition function
is compactly represented using (multi-terminal) binary
decision diagrams (BDD). The key idea consists in
computing a bisimulation by exploring reachable pairs
symbolically, so as to avoid redundancies. This idea
can be combined with already existing optimisations,
and we show in particular a nice integration with the
disjoint sets forest data-structure from Hopcroft and
Karp's standard algorithm. Then we consider Kleene
algebra with tests (KAT), an algebraic theory that can
be used for verification in various domains ranging
from compiler optimisation to network programming
analysis. This theory is decidable by reduction to
language equivalence of automata on guarded strings, a
particular kind of automata that have exponentially
large alphabets. We propose several methods allowing to
construct symbolic automata out of KAT expressions,
based either on Brzozowski's derivatives or on standard
automata constructions. All in all, this results in
efficient algorithms for deciding equivalence of KAT
expressions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Sjoberg:2015:PC,
author = "Vilhelm Sj{\"o}berg and Stephanie Weirich",
title = "Programming up to Congruence",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "369--382",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676974",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents the design of Zombie, a
dependently-typed programming language that uses an
adaptation of a congruence closure algorithm for proof
and type inference. This algorithm allows the type
checker to automatically use equality assumptions from
the context when reasoning about equality. Most
dependently-typed languages automatically use
equalities that follow from beta-reduction during type
checking; however, such reasoning is incompatible with
congruence closure. In contrast, Zombie does not use
automatic beta-reduction because types may contain
potentially diverging terms. Therefore Zombie provides
a unique opportunity to explore an alternative
definition of equivalence in dependently-typed language
design. Our work includes the specification of the
language via a bidirectional type system, which works
`up-to-congruence,' and an algorithm for elaborating
expressions in this language to an explicitly typed
core language. We prove that our elaboration algorithm
is complete with respect to the source type system, and
always produces well typed terms in the core language.
This algorithm has been implemented in the Zombie
language, which includes general recursion, irrelevant
arguments, heterogeneous equality and datatypes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Tobisawa:2015:MLC,
author = "Kazunori Tobisawa",
title = "A Meta Lambda Calculus with Cross-Level Computation",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "383--393",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676976",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose meta lambda calculus Lambda-* as a basic
model of textual substitution via metavariables. The
most important feature of the calculus is that every
beta-redex can be reduced regardless of whether the
beta-redex contains meta-level variables or not. Such a
meta lambda calculus has never been achieved before due
to difficulty to manage binding structure consistently
with alpha-renaming in the presence of meta-level
variables. We overcome the difficulty by introducing a
new mechanism to deal with substitution and binding
structure in a systematic way without the notion of
free variables and alpha-renaming. Calculus Lambda-*
enables us to investigate cross-level terms that
include a certain type of level mismatch. Cross-level
terms have been regarded as meaningless terms and left
out of consideration thus far. We find that some
cross-level terms behave as quotes and `eval' command
in programming languages. With these terms, we show a
procedural language as an application of the calculus,
which sheds new light on the notions of stores and
recursion via meta-level variables.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Staton:2015:AEL,
author = "Sam Staton",
title = "Algebraic Effects, Linearity, and Quantum Programming
Languages",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "395--406",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676999",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We develop a new framework of algebraic theories with
linear parameters, and use it to analyze the equational
reasoning principles of quantum computing and quantum
programming languages. We use the framework as follows:
we present a new elementary algebraic theory of quantum
computation, built from unitary gates and measurement;
we provide a completeness theorem or the elementary
algebraic theory by relating it with a model from
operator algebra; we extract an equational theory for a
quantum programming language from the algebraic theory;
we compare quantum computation with other local notions
of computation by investigating variations on the
algebraic theory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Farzan:2015:PSU,
author = "Azadeh Farzan and Zachary Kincaid and Andreas
Podelski",
title = "Proof Spaces for Unbounded Parallelism",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "407--420",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677012",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we present a new approach to
automatically verify multi-threaded programs which are
executed by an unbounded number of threads running in
parallel. The starting point for our work is the
problem of how we can leverage existing automated
verification technology for sequential programs
(abstract interpretation, Craig interpolation,
constraint solving, etc.) for multi-threaded programs.
Suppose that we are given a correctness proof for a
trace of a program (or for some other program
fragment). We observe that the proof can always be
decomposed into a finite set of Hoare triples, and we
ask what can be proved from the finite set of Hoare
triples using only simple combinatorial inference rules
(without access to a theorem prover and without the
possibility to infer genuinely new Hoare triples)? We
introduce a proof system where one proves the
correctness of a multi-threaded program by showing that
for each trace of the program, there exists a
correctness proof in the space of proofs that are
derivable from a finite set of axioms using simple
combinatorial inference rules. This proof system is
complete with respect to the classical proof method of
establishing an inductive invariant (which uses thread
quantification and control predicates). Moreover, it is
possible to algorithmically check whether a given set
of axioms is sufficient to prove the correctness of a
multi-threaded program, using ideas from
well-structured transition systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Sangiorgi:2015:ECU,
author = "Davide Sangiorgi",
title = "Equations, Contractions, and Unique Solutions",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "421--432",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676965",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "One of the most studied behavioural equivalences is
bisimilarity. Its success is much due to the associated
bisimulation proof method, which can be further
enhanced by means of ``up-to bisimulation'' techniques
such as ``up-to context''. A different proof method is
discussed, based on unique solution of special forms of
inequations called contractions, and inspired by
Milner's theorem on unique solution of equations. The
method is as powerful as the bisimulation proof method
and its ``up-to context'' enhancements. The definition
of contraction can be transferred onto other
behavioural equivalences, possibly contextual and
noncoinductive. This enables a coinductive reasoning
style on such equivalences, either by applying the
method based on unique solution of contractions, or by
injecting appropriate contraction preorders into the
bisimulation game. The techniques are illustrated on
CCS-like languages; an example dealing with
higher-order languages is also shown.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Gupta:2015:SRC,
author = "Ashutosh Gupta and Thomas A. Henzinger and Arjun
Radhakrishna and Roopsha Samanta and Thorsten Tarrach",
title = "Succinct Representation of Concurrent Trace Sets",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "433--444",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677008",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a method and a tool for generating succinct
representations of sets of concurrent traces. We focus
on trace sets that contain all correct or all incorrect
permutations of events from a given trace. We represent
trace sets as HB-Formulas that are Boolean combinations
of happens-before constraints between events. To
generate a representation of incorrect interleavings,
our method iteratively explores interleavings that
violate the specification and gathers generalizations
of the discovered interleavings into an HB-Formula; its
complement yields a representation of correct
interleavings. We claim that our trace set
representations can drive diverse verification, fault
localization, repair, and synthesis techniques for
concurrent programs. We demonstrate this by using our
tool in three case studies involving synchronization
synthesis, bug summarization, and abstraction
refinement based verification. In each case study, our
initial experimental results have been promising. In
the first case study, we present an algorithm for
inferring missing synchronization from an HB-Formula
representing correct interleavings of a given trace.
The algorithm applies rules to rewrite specific
patterns in the HB-Formula into locks, barriers, and
wait-notify constructs. In the second case study, we
use an HB-Formula representing incorrect interleavings
for bug summarization. While the HB-Formula itself is a
concise counterexample summary, we present additional
inference rules to help identify specific concurrency
bugs such as data races, define-use order violations,
and two-stage access bugs. In the final case study, we
present a novel predicate learning procedure that uses
HB-Formulas representing abstract counterexamples to
accelerate counterexample-guided abstraction refinement
(CEGAR). In each iteration of the CEGAR loop, the
procedure refines the abstraction to eliminate multiple
spurious abstract counterexamples drawn from the
HB-Formula.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Bogdanas:2015:KJC,
author = "Denis Bogdanas and Grigore Rosu",
title = "{K-Java}: a Complete Semantics of {Java}",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "445--456",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676982",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents K-Java, a complete executable
formal semantics of Java 1.4. K-Java was extensively
tested with a test suite developed alongside the
project, following the Test Driven Development
methodology. In order to maintain clarity while
handling the great size of Java, the semantics was
split into two separate definitions --- a static
semantics and a dynamic semantics. The output of the
static semantics is a preprocessed Java program, which
is passed as input to the dynamic semantics for
execution. The preprocessed program is a valid Java
program, which uses a subset of the features of Java.
The semantics is applied to model-check multi-threaded
programs. Both the test suite and the static semantics
are generic and ready to be used in other Java-related
projects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Adams:2015:TEH,
author = "Michael D. Adams",
title = "Towards the Essence of Hygiene",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "457--469",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677013",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hygiene is an essential aspect of Scheme's macro
system that prevents unintended variable capture.
However, previous work on hygiene has focused on
algorithmic implementation rather than precise,
mathematical definition of what constitutes hygiene.
This is in stark contrast with lexical scope,
alpha-equivalence and capture-avoiding substitution,
which also deal with preventing unintended variable
capture but have widely applicable and well-understood
mathematical definitions. This paper presents such a
precise, mathematical definition of hygiene. It reviews
various kinds of hygiene violation and presents
examples of how they occur. From these examples, we
develop a practical algorithm for hygienic macro
expansion. We then present algorithm-independent,
mathematical criteria for whether a macro expansion
algorithm is hygienic. This characterization
corresponds closely to existing hygiene algorithms and
sheds light on aspects of hygiene that are usually
overlooked in informal definitions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Brown:2015:SRG,
author = "Matt Brown and Jens Palsberg",
title = "Self-Representation in {Girard}'s {System U}",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "471--484",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676988",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In 1991, Pfenning and Lee studied whether System F
could support a typed self-interpreter. They concluded
that typed self-representation for System F ``seems to
be impossible'', but were able to represent System F in
F$_{ \omega }$. Further, they found that the
representation of F$_{ \omega }$ requires kind
polymorphism, which is outside F$_{ \omega }$. In 2009,
Rendel, Ostermann and Hofer conjectured that the
representation of kind-polymorphic terms would require
another, higher form of polymorphism. Is this a case of
infinite regress? We show that it is not and present a
typed self-representation for Girard's System U, the
first for a \lambda -calculus with decidable type
checking. System U extends System F$_{ \omega }$ with
kind polymorphic terms and types. We show that kind
polymorphic types (i.e. types that depend on kinds) are
sufficient to ``tie the knot'' --- they enable
representations of kind polymorphic terms without
introducing another form of polymorphism. Our
self-representation supports operations that iterate
over a term, each of which can be applied to a
representation of itself. We present three typed
self-applicable operations: a self-interpreter that
recovers a term from its representation, a predicate
that tests the intensional structure of a term, and a
typed continuation-passing-style (CPS) transformation
--- the first typed self-applicable CPS transformation.
Our techniques could have applications from verifiably
type-preserving metaprograms, to growable typed
languages, to more efficient self-interpreters.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Lee:2015:CEE,
author = "Peter Lee",
title = "Coding by Everyone, Every Day",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "485--485",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2682622",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In recent years, advances in machine learning and
related fields have led to significant advances in a
range of user-interface technologies, including audio
processing, speech recognition, and natural language
processing. These advances in turn have enabled
speech-based digital assistants and speech-to-speech
translation systems to become practical to deploy on a
large scale. In essence, machines are becoming capable
of hearing what we are saying. But will they understand
what we want them to do when we talk to them? What are
the prospects for getting useful work done in essence,
by synthesizing programs --- through the act of having
a conversation with a computer? In this lecture, I will
speculate on the central role that programming-language
design and program synthesis may have in this possible
--- and I will argue, likely --- future of computing,
one in which every user writes programs, every day, by
conversing with a computing system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Buneman:2015:DPT,
author = "Peter Buneman",
title = "Databases and Programming: Two Subjects Divided by a
Common Language?",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "487--487",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2682620",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The 1990s saw a hugely productive interaction between
database and programming language research. Ideas about
type systems from programming languages played a
central role in generalizing and adapting relational
database systems to new data models. At the same time
databases provided some of the best concrete examples
of the application of concurrency theory and of the
benefits of high-level optimization in functional
programming languages. One of the driving ambitions
behind this research was the idea that database access
should be properly embedded in programming languages:
one should not have to be bilingual in order to use a
database from a programming language; and that goal has
to some extent been realized. In the past fifteen
years, new data models, both for data storage and for
data exchange have appeared with depressing regularity
and with each such model, the inevitable query
language. Does programming language research have
anything to contribute to these new languages? Should
we take the time to to worry about embedding these
models in conventional languages? Over the same period,
some interesting new connections between databases and
programming languages have emerged, notably in the
areas of scientific databases, annotation and
provenance. Will this provide new opportunities for
cross-fertilization?",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Fioriti:2015:PTS,
author = "Luis Mar{\'\i}a Ferrer Fioriti and Holger Hermanns",
title = "Probabilistic Termination: Soundness, Completeness,
and Compositionality",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "489--501",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677001",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a framework to prove almost sure
termination for probabilistic programs with real valued
variables. It is based on ranking supermartingales, a
notion analogous to ranking functions on
non-probabilistic programs. The framework is proven
sound and complete for a meaningful class of programs
involving randomization and bounded nondeterminism. We
complement this foundational insight by a practical
proof methodology, based on sound conditions that
enable compositional reasoning and are amenable to a
direct implementation using modern theorem provers.
This is integrated in a small dependent type system, to
overcome the problem that lexicographic ranking
functions fail when combined with randomization. Among
others, this compositional methodology enables the
verification of probabilistic programs outside the
complete class that admits ranking supermartingales.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{He:2015:LWA,
author = "Fei He and Xiaowei Gao and Bow-Yaw Wang and Lijun
Zhang",
title = "Leveraging Weighted Automata in Compositional
Reasoning about Concurrent Probabilistic Systems",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "503--514",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676998",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose the first sound and complete learning-based
compositional verification technique for probabilistic
safety properties on concurrent systems where each
component is an Markov decision process. Different from
previous works, weighted assumptions are introduced to
attain completeness of our framework. Since weighted
assumptions can be implicitly represented by
multi-terminal binary decision diagrams (MTBDD's), we
give an L*-based learning algorithm for MTBDD's to
infer weighted assumptions. Experimental results
suggest promising outlooks for our compositional
technique.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Bonchi:2015:FAS,
author = "Filippo Bonchi and Pawel Sobocinski and Fabio Zanasi",
title = "Full Abstraction for Signal Flow Graphs",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "515--526",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676993",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Network theory uses the string diagrammatic language
of monoidal categories to study graphical structures
formally, eschewing specialised translations into
intermediate formalisms. Recently, there has been a
concerted research focus on developing a network
theoretic approach to signal flow graphs, which are
classical structures in control theory, signal
processing and a cornerstone in the study of feedback.
In this approach, signal flow graphs are given a
relational denotational semantics in terms of formal
power series. Thus far, the operational behaviour of
such signal flow graphs has only been discussed at an
intuitive level. In this paper we equip them with a
structural operational semantics. As is typically the
case, the purely operational picture is too concrete
--- two graphs that are denotationally equal may
exhibit different operational behaviour. We classify
the ways in which this can occur and show that any
graph can be realised --- rewritten, using the
graphical theory, into an executable form where the
operational behavior and the denotation coincides.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Hinze:2015:CHM,
author = "Ralf Hinze and Nicolas Wu and Jeremy Gibbons",
title = "Conjugate Hylomorphisms --- Or: The Mother of All
Structured Recursion Schemes",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "527--538",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676989",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The past decades have witnessed an extensive study of
structured recursion schemes. A general scheme is the
hylomorphism, which captures the essence of
divide-and-conquer: a problem is broken into
sub-problems by a coalgebra; sub-problems are solved
recursively; the sub-solutions are combined by an
algebra to form a solution. In this paper we develop a
simple toolbox for assembling recursive coalgebras,
which by definition ensure that their hylo equations
have unique solutions, whatever the algebra. Our main
tool is the conjugate rule, a generic rule parametrized
by an adjunction and a conjugate pair of natural
transformations. We show that many basic adjunctions
induce useful recursion schemes. In fact, almost every
structured recursion scheme seems to arise as an
instance of the conjugate rule. Further, we adapt our
toolbox to the more expressive setting of
parametrically recursive coalgebras, where the original
input is also passed to the algebra. The formal
development is complemented by a series of worked-out
examples in Haskell.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Chatterjee:2015:QIA,
author = "Krishnendu Chatterjee and Andreas Pavlogiannis and
Yaron Velner",
title = "Quantitative Interprocedural Analysis",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "539--551",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676968",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider the quantitative analysis problem for
interprocedural control-flow graphs (ICFGs). The input
consists of an ICFG, a positive weight function that
assigns every transition a positive integer-valued
number, and a labelling of the transitions (events) as
good, bad, and neutral events. The weight function
assigns to each transition a numerical value that
represents a measure of how good or bad an event is.
The quantitative analysis problem asks whether there is
a run of the ICFG where the ratio of the sum of the
numerical weights of good events versus the sum of
weights of bad events in the long-run is at least a
given threshold (or equivalently, to compute the
maximal ratio among all valid paths in the ICFG). The
quantitative analysis problem for ICFGs can be solved
in polynomial time, and we present an efficient and
practical algorithm for the problem. We show that
several problems relevant for static program analysis,
such as estimating the worst-case execution time of a
program or the average energy consumption of a mobile
application, can be modeled in our framework. We have
implemented our algorithm as a tool in the Java Soot
framework. We demonstrate the effectiveness of our
approach with two case studies. First, we show that our
framework provides a sound approach (no false
positives) for the analysis of inefficiently-used
containers. Second, we show that our approach can also
be used for static profiling of programs which reasons
about methods that are frequently invoked. Our
experimental results show that our tool scales to
relatively large benchmarks, and discovers relevant and
useful information that can be used to optimize
performance of the programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Bastani:2015:SIU,
author = "Osbert Bastani and Saswat Anand and Alex Aiken",
title = "Specification Inference Using Context-Free Language
Reachability",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "553--566",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676977",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a framework for computing context-free
language reachability properties when parts of the
program are missing. Our framework infers candidate
specifications for missing program pieces that are
needed for verifying a property of interest, and
presents these specifications to a human auditor for
validation. We have implemented this framework for a
taint analysis of Android apps that relies on
specifications for Android library methods. In an
extensive experimental study on 179 apps, our tool
performs verification with only a small number of
queries to a human auditor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Elango:2015:CDA,
author = "Venmugil Elango and Fabrice Rastello and
Louis-No{\"e}l Pouchet and J. Ramanujam and P.
Sadayappan",
title = "On Characterizing the Data Access Complexity of
Programs",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "567--580",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677010",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Technology trends will cause data movement to account
for the majority of energy expenditure and execution
time on emerging computers. Therefore, computational
complexity will no longer be a sufficient metric for
comparing algorithms, and a fundamental
characterization of data access complexity will be
increasingly important. The problem of developing lower
bounds for data access complexity has been modeled
using the formalism of Hong and Kung's red/blue pebble
game for computational directed acyclic graphs (CDAGs).
However, previously developed approaches to lower
bounds analysis for the red/blue pebble game are very
limited in effectiveness when applied to CDAGs of real
programs, with computations comprised of multiple
sub-computations with differing DAG structure. We
address this problem by developing an approach for
effectively composing lower bounds based on graph
decomposition. We also develop a static analysis
algorithm to derive the asymptotic data-access lower
bounds of programs, as a function of the problem size
and cache size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Agten:2015:SMV,
author = "Pieter Agten and Bart Jacobs and Frank Piessens",
title = "Sound Modular Verification of {C} Code Executing in an
Unverified Context",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "581--594",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676972",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the past decade, great progress has been made in
the static modular verification of C code by means of
separation logic-based program logics. However, the
runtime guarantees offered by such verification are
relatively limited when the verified modules are part
of a whole program that also contains unverified
modules. In particular, a memory safety error in an
unverified module can corrupt the runtime state,
leading to assertion failures or invalid memory
accesses in the verified modules. This paper develops
runtime checks to be inserted at the boundary between
the verified and the unverified part of a program, to
guarantee that no assertion failures or invalid memory
accesses can occur at runtime in any verified module.
One of the key challenges is enforcing the separation
logic frame rule, which we achieve by checking the
integrity of the footprint of the verified part of the
program on each control flow transition from the
unverified to the verified part. This in turn requires
the presence of some support for module-private memory
at runtime. We formalize our approach and prove
soundness. We implement the necessary runtime checks by
means of a program transformation that translates C
code with separation logic annotations into plain C,
and that relies on a protected module architecture for
providing module-private memory and restricted module
entry points. Benchmarks show the performance impact of
this transformation depends on the choice of boundary
between the verified and unverified parts of the
program, but is below 4\% for real-world
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Gu:2015:DSC,
author = "Ronghui Gu and J{\'e}r{\'e}mie Koenig and Tahina
Ramananandro and Zhong Shao and Xiongnan (Newman) Wu
and Shu-Chun Weng and Haozhong Zhang and Yu Guo",
title = "Deep Specifications and Certified Abstraction Layers",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "595--608",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676975",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern computer systems consist of a multitude of
abstraction layers (e.g., OS kernels, hypervisors,
device drivers, network protocols), each of which
defines an interface that hides the implementation
details of a particular set of functionality. Client
programs built on top of each layer can be understood
solely based on the interface, independent of the layer
implementation. Despite their obvious importance,
abstraction layers have mostly been treated as a system
concept; they have almost never been formally specified
or verified. This makes it difficult to establish
strong correctness properties, and to scale program
verification across multiple layers. In this paper, we
present a novel language-based account of abstraction
layers and show that they correspond to a strong form
of abstraction over a particularly rich class of
specifications which we call deep specifications. Just
as data abstraction in typed functional languages leads
to the important representation independence property,
abstraction over deep specification is characterized by
an important implementation independence property: any
two implementations of the same deep specification must
have contextually equivalent behaviors. We present a
new layer calculus showing how to formally specify,
program, verify, and compose abstraction layers. We
show how to instantiate the layer calculus in realistic
programming languages such as C and assembly, and how
to adapt the CompCert verified compiler to compile
certified C layers such that they can be linked with
assembly layers. Using these new languages and tools,
we have successfully developed multiple certified OS
kernels in the Coq proof assistant, the most realistic
of which consists of 37 abstraction layers, took less
than one person year to develop, and can boot a version
of Linux as a guest.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Chlipala:2015:NIM,
author = "Adam Chlipala",
title = "From Network Interface to Multithreaded {Web}
Applications: a Case Study in Modular Program
Verification",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "609--622",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677003",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many verifications of realistic software systems are
monolithic, in the sense that they define single global
invariants over complete system state. More modular
proof techniques promise to support reuse of component
proofs and even reduce the effort required to verify
one concrete system, just as modularity simplifies
standard software development. This paper reports on
one case study applying modular proof techniques in the
Coq proof assistant. To our knowledge, it is the first
modular verification certifying a system that combines
infrastructure with an application of interest to end
users. We assume a nonblocking API for managing TCP
networking streams, and on top of that we work our way
up to certifying multithreaded, database-backed Web
applications. Key verified components include a
cooperative threading library and an implementation of
a domain-specific language for XML processing. We have
deployed our case-study system on mobile robots, where
it interfaces with off-the-shelf components for
sensing, actuation, and control.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Crary:2015:CRM,
author = "Karl Crary and Michael J. Sullivan",
title = "A Calculus for Relaxed Memory",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "623--636",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676984",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new approach to programming multi-core,
relaxed-memory architectures in imperative, portable
programming languages. Our memory model is based on
explicit, programmer-specified requirements for order
of execution and the visibility of writes. The compiler
then realizes those requirements in the most efficient
manner it can. This is in contrast to existing memory
models, which---if they allow programmer control over
synchronization at all---are based on inferring the
execution and visibility consequences of
synchronization operations or annotations in the code.
We formalize our memory model in a core calculus called
RMC\@. Outside of the programmer's specified
requirements, RMC is designed to be strictly more
relaxed than existing architectures. It employs an
aggressively nondeterministic semantics for
expressions, in which actions can be executed in nearly
any order, and a store semantics that generalizes
Sarkar et al.'s and Alglave et al.'s models of the
Power architecture. We establish several results for
RMC, including sequential consistency for two
programming disciplines, and an appropriate notion of
type safety. All our results are formalized in Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Jung:2015:IMI,
author = "Ralf Jung and David Swasey and Filip Sieczkowski and
Kasper Svendsen and Aaron Turon and Lars Birkedal and
Derek Dreyer",
title = "{Iris}: Monoids and Invariants as an Orthogonal Basis
for Concurrent Reasoning",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "637--650",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676980",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Iris, a concurrent separation logic with a
simple premise: monoids and invariants are all you
need. Partial commutative monoids enable us to
express---and invariants enable us to
enforce---user-defined *protocols* on shared state,
which are at the conceptual core of most recent program
logics for concurrency. Furthermore, through a novel
extension of the concept of a *view shift*, Iris
supports the encoding of *logically atomic
specifications*, i.e., Hoare-style specs that permit
the client of an operation to treat the operation
essentially as if it were atomic, even if it is not.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Bouajjani:2015:TRC,
author = "Ahmed Bouajjani and Michael Emmi and Constantin Enea
and Jad Hamza",
title = "Tractable Refinement Checking for Concurrent Objects",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "651--662",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677002",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficient implementations of concurrent objects such
as semaphores, locks, and atomic collections are
essential to modern computing. Yet programming such
objects is error prone: in minimizing the
synchronization overhead between concurrent object
invocations, one risks the conformance to reference
implementations --- or in formal terms, one risks
violating observational refinement. Testing this
refinement even within a single execution is
intractable, limiting existing approaches to executions
with very few object invocations. We develop a
polynomial-time (per execution) approximation to
refinement checking. The approximation is parameterized
by an accuracy k \in N representing the degree to which
refinement violations are visible. In principle, more
violations are detectable as k increases, and in the
limit, all are detectable. Our insight for this
approximation arises from foundational properties on
the partial orders characterizing the happens-before
relations between object invocations: they are interval
orders, with a well defined measure of complexity,
i.e., their length. Approximating the happens-before
relation with a possibly-weaker interval order of
bounded length can be efficiently implemented by
maintaining a bounded number of integer counters. In
practice, we find that refinement violations can be
detected with very small values of k, and that our
approach scales far beyond existing refinement-checking
approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Padon:2015:DSP,
author = "Oded Padon and Neil Immerman and Aleksandr Karbyshev
and Ori Lahav and Mooly Sagiv and Sharon Shoham",
title = "Decentralizing {SDN} Policies",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "663--676",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676990",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software-defined networking (SDN) is a new paradigm
for operating and managing computer networks. SDN
enables logically-centralized control over network
devices through a ``controller'' --- software that
operates independently of the network hardware. Network
operators can run both in-house and third-party SDN
programs on top of the controller, e.g., to specify
routing and access control policies. In practice,
having the controller handle events limits the network
scalability. Therefore, the feasibility of SDN depends
on the ability to efficiently decentralize network
event-handling by installing forwarding rules on the
switches. However, installing a rule too early or too
late may lead to incorrect behavior, e.g., (1) packets
may be forwarded to the wrong destination or
incorrectly dropped; (2) packets handled by the switch
may hide vital information from the controller, leading
to incorrect forwarding behavior. The second issue is
subtle and sometimes missed even by experienced
programmers. The contributions of this paper are two
fold. First, we formalize the correctness and
optimality requirements for decentralizing network
policies. Second, we identify a useful class of network
policies which permits automatic synthesis of a
controller which performs optimal forwarding rule
installation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Cochran:2015:PBP,
author = "Robert A. Cochran and Loris D'Antoni and Benjamin
Livshits and David Molnar and Margus Veanes",
title = "Program Boosting: Program Synthesis via
Crowd-Sourcing",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "677--688",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2676973",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "In this paper, we investigate an approach to program
synthesis that is based on crowd-sourcing. With the
help of crowd-sourcing, we aim to capture the ``wisdom
of the crowds'' to find good if not perfect solutions
to inherently tricky programming tasks, which elude
even expert developers and lack an easy-to-formalize
specification. We propose an approach we call program
boosting, which involves crowd-sourcing imperfect
solutions to a difficult programming problem from
developers and then blending these programs together in
a way that improves their correctness. We implement
this approach in a system called CROWDBOOST and show in
our experiments that interesting and highly non-trivial
tasks such as writing regular expressions for URLs or
email addresses can be effectively crowd-sourced. We
demonstrate that carefully blending the crowd-sourced
results together consistently produces a boost,
yielding results that are better than any of the
starting programs. Our experiments on 465 program pairs
show consistent boosts in accuracy and demonstrate that
program boosting can be performed at a relatively
modest monetary cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Delaware:2015:FDS,
author = "Benjamin Delaware and Cl{\'e}ment Pit-Claudel and
Jason Gross and Adam Chlipala",
title = "{Fiat}: Deductive Synthesis of Abstract Data Types in
a Proof Assistant",
journal = j-SIGPLAN,
volume = "50",
number = "1",
pages = "689--700",
month = jan,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775051.2677006",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Fiat, a library for the Coq proof assistant
supporting refinement of declarative specifications
into efficient functional programs with a high degree
of automation. Each refinement process leaves a proof
trail, checkable by the normal Coq kernel, justifying
its soundness. We focus on the synthesis of abstract
data types that package methods with private data. We
demonstrate the utility of our framework by applying it
to the synthesis of query structures --- abstract data
types with SQL-like query and insert operations. Fiat
includes a library for writing specifications of query
structures in SQL-inspired notation, expressing
operations over relations (tables) in terms of
mathematical sets. This library includes a suite of
tactics for automating the refinement of specifications
into efficient, correct-by-construction OCaml code.
Using these tactics, a programmer can generate such an
implementation completely automatically by only
specifying the equivalent of SQL indexes, data
structures capturing useful views of the abstract data.
Throughout we speculate on the new programming
modularity possibilities enabled by an automated
refinement system with proved-correct rules.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '15 conference proceedings.",
}
@Article{Hanenberg:2015:WDW,
author = "Stefan Hanenberg",
title = "Why do we know so little about programming languages,
and what would have happened if we had known more?",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "1--1",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661102",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming language research in the last decades was
mainly driven by mathematical methods (such as formal
semantics, correctness proofs, type soundness proofs,
etc.) or run-time arguments based on benchmark tests.
This happened despite the frequent discussion over
programming language usability. We have now been
through decade after decade of one language after
another dominating the field, forcing companies to
switch languages and migrate libraries. Now that
Javascript seems to be the next language to dominate,
people start to ask old questions anew. The first goal
of this talk is to discuss why the application of
empirical methods is (still) relatively rare in PL
research, and to discuss what could be done in
empirical methods to make them a substantial part of PL
research. The second goal is to speculate about the
possible effects that concrete empirical knowledge
could have had on the programming language community.
For example, what would have happened to programming
languages if current knowledge would have been
available 30 years ago? What if knowledge about
programming languages from the year 2050 would be
available today?",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Neto:2015:SOS,
author = "Lourival Vieira Neto and Roberto Ierusalimschy and Ana
L{\'u}cia de Moura and Marc Balmer",
title = "Scriptable operating systems with {Lua}",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "2--10",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661096",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Extensible operating system is a design based on the
idea that operating systems can be adapted to meet user
requirements by allowing user extensions. In a
different scenario, that of application development,
there is a paradigm that supports that complex systems
should allow users to write scripts to tailor an
application to their needs. In this paper we propose
the concept of scriptable operating system, which
applies scripting development paradigm to the concept
of extensible operating systems. Scriptable operating
systems support that operating systems can adequately
provide extensibility by allowing users to script their
kernel. We also present an implementation of a
kernel-scripting environment that allows users to
dynamically extend Linux and NetBSD operating systems
using the scripting language Lua. To evaluate this
environment, we extended both OS kernels to allow users
to script CPU frequency scaling and network packet
filtering using Lua.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Johnson:2015:AAC,
author = "James Ian Johnson and David {Van Horn}",
title = "Abstracting abstract control",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "11--22",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661098",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The strength of a dynamic language is also its
weakness: run-time flexibility comes at the cost of
compile-time predictability. Many of the hallmarks of
dynamic languages such as closures, continuations,
various forms of reflection, and a lack of static types
make many programmers rejoice, while compiler writers,
tool developers, and verification engineers lament. The
dynamism of these features simply confounds statically
reasoning about programs that use them. Consequently,
static analyses for dynamic languages are few, far
between, and seldom sound. The ``abstracting abstract
machines'' (AAM) approach to constructing static
analyses has recently been proposed as a method to
ameliorate the difficulty of designing analyses for
such language features. The approach, so called because
it derives a function for the sound and computable
approximation of program behavior starting from the
abstract machine semantics of a language, provides a
viable approach to dynamic language analysis since all
that is required is a machine description of the
interpreter. The AAM recipe as originally described
produces finite state abstractions: the behavior of a
program is approximated as a finite state machine. Such
a model is inherently imprecise when it comes to
reasoning about the control stack of the interpreter: a
finite state machine cannot faithfully represent a
stack. Recent advances have shown that higher-order
programs can be approximated with pushdown systems.
However, such models, founded in automata theory,
either breakdown or require significant engineering in
the face of dynamic language features that inspect or
modify the control stack. In this paper, we tackle the
problem of bringing pushdown flow analysis to the
domain of dynamic language features. We revise the
abstracting abstract machines technique to target the
stronger computational model of pushdown systems. In
place of automata theory, we use only abstract machines
and memoization. As case studies, we show the technique
applies to a language with closures, garbage
collection, stack-inspection, and first-class
composable continuations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Strickland:2015:CDS,
author = "T. Stephen Strickland and Brianna M. Ren and Jeffrey
S. Foster",
title = "Contracts for domain-specific languages in {Ruby}",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "23--34",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661092",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper concerns object-oriented embedded DSLs,
which are popular in the Ruby community but have
received little attention in the research literature.
Ruby DSLs implement language keywords as implicit
method calls to self; language structure is enforced by
adjusting which object is bound to self in different
scopes. While Ruby DSLs are powerful and elegant, they
suffer from a lack of specification. In this paper, we
introduce contracts for Ruby DSLs, which allow us to
attribute blame appropriately when there are
inconsistencies between an implementation and client.
We formalize Ruby DSL contract checking in DSL, a core
calculus that uses premethods with instance evaluation
to enforce contracts. We then describe RDL, an
implementation of Ruby DSL contracts. Finally, we
present two tools that automatically infer RDL
contracts: TypeInfer infers simple, type-like contracts
based on observed method calls, and DSLInfer infers DSL
keyword scopes and nesting by generating and testing
candidate DSL usages based on initial examples. The
type contracts generated by TypeInfer work well enough,
though they are limited in precision by the small
number of tests, while DSLInfer finds almost all DSL
structure. Our goal is to help users understand a DSL
from example programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Disney:2015:SYJ,
author = "Tim Disney and Nathan Faubion and David Herman and
Cormac Flanagan",
title = "Sweeten your {JavaScript}: hygienic macros for {ES5}",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "35--44",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661097",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Lisp and Scheme have demonstrated the power of macros
to enable programmers to evolve and craft languages. In
languages with more complex syntax, macros have had
less success. In part, this has been due to the
difficulty in building expressive hygienic macro
systems for such languages. JavaScript in particular
presents unique challenges for macro systems due to
ambiguities in the lexing stage that force the
JavaScript lexer and parser to be intertwined. In this
paper we present a novel solution to the lexing
ambiguity of JavaScript that enables us to cleanly
separate the JavaScript lexer and parser by recording
enough history during lexing to resolve ambiguities. We
give an algorithm for this solution along with a proof
that it does in fact correctly resolve ambiguities in
the language. Though the algorithm and proof we present
is specific to JavaScript, the general technique can be
applied to other languages with ambiguous grammars.
With lexer and parser separated, we then implement an
expressive hygienic macro system for JavaScript called
sweet.js.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Vitousek:2015:DEG,
author = "Michael M. Vitousek and Andrew M. Kent and Jeremy G.
Siek and Jim Baker",
title = "Design and evaluation of gradual typing for {Python}",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "45--56",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661101",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Combining static and dynamic typing within the same
language offers clear benefits to programmers. It
provides dynamic typing in situations that require
rapid prototyping, heterogeneous data structures, and
reflection, while supporting static typing when safety,
modularity, and efficiency are primary concerns. Siek
and Taha (2006) introduced an approach to combining
static and dynamic typing in a fine-grained manner
through the notion of type consistency in the static
semantics and run-time casts in the dynamic semantics.
However, many open questions remain regarding the
semantics of gradually typed languages. In this paper
we present Reticulated Python, a system for
experimenting with gradual-typed dialects of Python.
The dialects are syntactically identical to Python 3
but give static and dynamic semantics to the type
annotations already present in Python 3. Reticulated
Python consists of a typechecker and a source-to-source
translator from Reticulated Python to Python 3. Using
Reticulated Python, we evaluate a gradual type system
and three approaches to the dynamic semantics of
mutable objects: the traditional semantics based on
Siek and Taha (2007) and Herman et al. (2007) and two
new designs. We evaluate these designs in the context
of several third-party Python programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Freudenberg:2015:SMP,
author = "Bert Freudenberg and Dan H. H. Ingalls and Tim
Felgentreff and Tobias Pape and Robert Hirschfeld",
title = "{SqueakJS}: a modern and practical smalltalk that runs
in any browser",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "57--66",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661100",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "We report our experience in implementing SqueakJS, a
bit-compatible implementation of Squeak/Smalltalk
written in pure JavaScript. SqueakJS runs entirely in
the Web browser with a virtual filesystem that can be
directed to a server or client-side storage. Our
implementation is notable for simplicity and
performance gained through adaptation to the host
object memory and deployment leverage gained through
the Lively Web development environment. We present
several novel techniques as well as performance
measurements for the resulting virtual machine. Much of
this experience is potentially relevant to preserving
other dynamic language systems and making them
available in a browser-based environment.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Aigner:2015:AJE,
author = "Martin Aigner and Thomas H{\"u}tter and Christoph M.
Kirsch and Alexander Miller and Hannes Payer and Mario
Preishuber",
title = "{ACDC-JS}: explorative benchmarking of {JavaScript}
memory management",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "67--78",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661089",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "We present ACDC-JS, an open-source JavaScript memory
management benchmarking tool. ACDC-JS incorporates a
heap model based on real web applications and may be
configured to expose virtually any relevant performance
characteristics of JavaScript memory management
systems. ACDC-JS is based on ACDC, a benchmarking tool
for C/C++ that models periodic allocation and
deallocation behavior (AC) as well as persistent memory
(DC). We identify important characteristics of
JavaScript mutator behavior and propose a configurable
heap model based on typical distributions of these
characteristics as foundation for ACDC-JS. We describe
heap analyses of 13 real web applications extending
existing work on JavaScript behavior analysis. Our
experimental results show that ACDC-JS enables
performance benchmarking and debugging of
state-of-the-art JavaScript virtual machines such as V8
and SpiderMonkey by exposing key aspects of their
memory management performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Kotthaus:2015:DPS,
author = "Helena Kotthaus and Ingo Korb and Michael Engel and
Peter Marwedel",
title = "Dynamic page sharing optimization for the {R}
language",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "79--90",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661094",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic languages such as R are increasingly used to
process large data sets. Here, the R interpreter
induces a large memory overhead due to wasteful memory
allocation policies. If an application's working set
exceeds the available physical memory, the OS starts to
swap, resulting in slowdowns of a several orders of
magnitude. Thus, memory optimizations for R will be
beneficial to many applications. Existing R
optimizations are mostly based on dynamic compilation
or native libraries. Both methods are futile when the
OS starts to page out memory. So far, only a few,
data-type or application specific memory optimizations
for R exist. To remedy this situation, we present a
low-overhead page sharing approach for R that
significantly reduces the interpreter's memory
overhead. Concentrating on the most rewarding
optimizations avoids the high runtime overhead of
existing generic approaches for memory deduplication or
compression. In addition, by applying knowledge of
interpreter data structures and memory allocation
patterns, our approach is not constrained to specific R
applications and is transparent to the R interpreter.
Our page sharing optimization enables us to reduce the
memory consumption by up to 53.5\% with an average of
18.0\% for a set of real-world R benchmarks with a
runtime overhead of only 5.3\% on average. In cases
where page I/O can be avoided, significant speedups are
achieved.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Khan:2015:UJW,
author = "Faiz Khan and Vincent Foley-Bourgon and Sujay
Kathrotia and Erick Lavoie and Laurie Hendren",
title = "Using {JavaScript} and {WebCL} for numerical
computations: a comparative study of native and web
technologies",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "91--102",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661090",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "From its modest beginnings as a tool to validate
forms, JavaScript is now an industrial-strength
language used to power online applications such as
spreadsheets, IDEs, image editors and even 3D games.
Since all modern web browsers support JavaScript, it
provides a medium that is both easy to distribute for
developers and easy to access for users. This paper
provides empirical data to answer the question: Is
JavaScript fast enough for numerical computations? By
measuring and comparing the runtime performance of
benchmarks representative of a wide variety of
scientific applications, we show that sequential
JavaScript is within a factor of 2 of native code.
Parallel code using WebCL shows speed improvements of
up to 2.28 over JavaScript for the majority of the
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Rhodes:2015:DDO,
author = "Dustin Rhodes and Tim Disney and Cormac Flanagan",
title = "Dynamic detection of object capability violations
through model checking",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "103--112",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661099",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we present a new tool called DOCaT
(Dynamic Object Capability Tracer), a model checker for
JavaScript that detects capability leaks in an object
capability system. DOCaT includes an editor that
highlights the sections of code that can be potentially
transferred to untrusted third-party code along with a
trace showing how the code could be leaked in an actual
execution. This code highlighting provides a simple way
of visualizing the references untrusted code
potentially has access to and helps programmers to
discover if their code is leaking more capabilities
then required. DOCaT is implemented using a combination
of source code rewriting (using Sweet.js, a JavaScript
macro system), dynamic behavioral intercession
(Proxies, introduced in ES6, the most recent version of
JavaScript), and model checking. Together these methods
are able to locate common ways for untrusted code to
elevate its authority.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Steinert:2015:OVS,
author = "Bastian Steinert and Lauritz Thamsen and Tim
Felgentreff and Robert Hirschfeld",
title = "Object versioning to support recovery needs: using
proxies to preserve previous development states in
lively",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "113--124",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661093",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "We present object versioning as a generic approach to
preserve access to previous development and application
states. Version-aware references can manage the
modifications made to the target object and record
versions as desired. Such references can be provided
without modifications to the virtual machine. We used
proxies to implement the proposed concepts and
demonstrate the Lively Kernel running on top of this
object versioning layer. This enables Lively users to
undo the effects of direct manipulation and other
programming actions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Matsakis:2015:TOJ,
author = "Nicholas D. Matsakis and David Herman and Dmitry
Lomov",
title = "Typed objects in {JavaScript}",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "125--134",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661095",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript's typed arrays have proven to be a crucial
API for many JS applications, particularly those
working with large amounts of data or emulating other
languages. Unfortunately, the current typed array API
offers no means of abstraction. Programmers are
supplied with a simple byte buffer that can be viewed
as an array of integers or floats, but nothing more.
This paper presents a generalization of the typed
arrays API entitled typed objects. The typed objects
API is slated for inclusion in the upcoming ES7
standard. The API gives users the ability to define
named types, making typed arrays much easier to work
with. In particular, it is often trivial to replace
uses of existing JavaScript objects with typed objects,
resulting in better memory consumption and more
predictable performance. The advantages of the typed
object specification go beyond convenience, however. By
supporting opacity---that is, the ability to deny
access to the raw bytes of a typed object---the new
typed object specification makes it possible to store
objects as well as scalar data and also enables more
optimization by JIT compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Callau:2015:UTP,
author = "Oscar Calla{\'u} and Romain Robbes and {\'E}ric Tanter
and David R{\"o}thlisberger and Alexandre Bergel",
title = "On the use of type predicates in object-oriented
software: the case of smalltalk",
journal = j-SIGPLAN,
volume = "50",
number = "2",
pages = "135--146",
month = feb,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775052.2661091",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:21 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Object-orientation relies on polymorphism to express
behavioral variants. As opposed to traditional
procedural design, explicit type-based conditionals
should be avoided. This message is conveyed in
introductory material on object orientation, as well as
in object-oriented reengineering patterns. Is this
principle followed in practice? In other words, are
type predicates actually used in object-oriented
software, and if so, to which extent? Answering these
questions will assist practitioners and researchers
with providing information about the state of the
practice, and informing the active research program of
retrofitting type systems, clarifying whether complex
flow-sensitive typing approaches are necessary. Other
areas, such as refactoring and teaching object
orientation, can also benefit from empirical evidence
on the matter. We report on a study of the use of type
predicates in a large base of over 4 million lines of
Smalltalk code. Our study shows that type predicates
are in fact widely used to do explicit type dispatch,
suggesting that flow-sensitive typing approaches are
necessary for a type system retrofitted for a dynamic
object-oriented language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '14 conference proceedings.",
}
@Article{Jarvi:2015:SPH,
author = "Jaakko J{\"a}rvi and Gabriel Foust and Magne
Haveraaen",
title = "Specializing planners for hierarchical multi-way
dataflow constraint systems",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "1--10",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658762",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A constraint system consists of variables and a set of
constraints on those variables. To solve a constraint
system is to find a valuation that satisfies all
constraints; or the ``best'' subset of constraints if
not all can simultaneously be satisfied. In a multi-way
dataflow constraint system, solving requires selecting
a set of user-defined functions which, when executed,
will enforce the constraints. The task of selecting
these functions is called planning. The planner has two
kinds of input: the specification of the constraints
and an order of priority for those constraints. The
former typically changes seldom, while the latter
frequently, making constraint planning a potential
application for program specialization. This paper
shows how to generate specialized planners for
hierarchical multi-way dataflow constraint systems when
the constraints are known in advance. The specialized
planners are DFAs; they can be an order of magnitude or
more faster than a general purpose planner for the same
system. Our applications for constraint systems are in
user interface programming, where constraint systems
determine how a GUI should react to user
interaction---specialized planners can help to ensure
that GUIs' responses to user interaction are
instantaneous.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Steindorfer:2015:CSM,
author = "Michael J. Steindorfer and Jurgen J. Vinju",
title = "Code specialization for memory efficient hash tries
(short paper)",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "11--14",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658763",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The hash trie data structure is a common part in
standard collection libraries of JVM programming
languages such as Clojure and Scala. It enables fast
immutable implementations of maps, sets, and vectors,
but it requires considerably more memory than an
equivalent array-based data structure. This hinders the
scalability of functional programs and the further
adoption of this otherwise attractive style of
programming. In this paper we present a product family
of hash tries. We generate Java source code to
specialize them using knowledge of JVM object memory
layout. The number of possible specializations is
exponential. The optimization challenge is thus to find
a minimal set of variants which lead to a maximal loss
in memory footprint on any given data. Using a set of
experiments we measured the distribution of internal
tree node sizes in hash tries. We used the results as a
guidance to decide which variants of the family to
generate and which variants should be left to the
generic implementation. A preliminary validating
experiment on the implementation of sets and maps shows
that this technique leads to a median decrease of 55\%
in memory footprint for maps (and 78\% for sets), while
still maintaining comparable performance. Our
combination of data analysis and code specialization
proved to be effective.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Malakuti:2015:EGM,
author = "Somayeh Malakuti and Mehmet Aksit",
title = "Emergent gummy modules: modular representation of
emergent behavior",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "15--24",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658764",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emergent behavior is generally defined as the
appearance of complex behavior out of multiplicity of
relatively simple interactions. Nowadays, there are
various kinds of software systems that deal with
detecting the emergence of certain behavior in
environment, representing it in the software and
providing means to manipulate the behavior. Where
significant amount of research has been dedicated to
develop algorithms for detecting emergent behavior,
there is no dedicated attempt to provide suitable
linguistic abstractions to modularize emergent behavior
and its related concerns. This results in
implementations that are complex and hard to maintain.
In this paper, we identify three characteristic
features of emergent behavior, and outline the
shortcomings of current languages to properly program
and modularize emergent behavior. We introduce emergent
gummy modules as dedicated linguistic abstractions,
which facilitate defining the appearance and
disappearance conditions of emergent behavior as well
as its utilization operations as one holistic module.
We explain the implementation of emergent gummy modules
in the GummyJ language, and illustrate that they
improve the modularity of implementations. We represent
the event processing semantics of GummyJ programs in
UPPAAL model checker and verify their correctness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Gouseti:2015:ELI,
author = "Maria Gouseti and Chiel Peters and Tijs van der
Storm",
title = "Extensible language implementation with object
algebras (short paper)",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "25--28",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658765",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Object Algebras are a recently introduced design
pattern to make the implementation of recursive data
types more extensible. In this short paper we report
our experience in using Object Algebras in building a
realistic domain specific language (DSL) for
questionnaires, called QL. This experience has led to a
simple, yet powerful set of tools for the practical and
flexible implementation of highly extensible
languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Walkingshaw:2015:PEV,
author = "Eric Walkingshaw and Klaus Ostermann",
title = "Projectional editing of variational software",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "29--38",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658766",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Editing the source code of variational software is
complicated by the presence of variation annotations,
such as \#ifdef statements, and by code that is only
included in some configurations. When editing some
configurations and not others, it would be easier to
edit a simplified version of the source code that
includes only the configurations we currently care
about. In this paper, we present a projectional editing
model for variational software. Using our approach, a
programmer can partially configure a variational
program, edit this simplified view of the code, and
then automatically update the original, fully
variational source code. The model is based on an
isolation principle where edits affect only the
variants that are visible in the view. We show that
this principle has several nice properties that are
suggested by related work on bidirectional
transformations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Ruprecht:2015:AFS,
author = "Andreas Ruprecht and Bernhard Heinloth and Daniel
Lohmann",
title = "Automatic feature selection in large-scale
system-software product lines",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "39--48",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658767",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "System software can typically be configured at compile
time via a comfortable feature-based interface to
tailor its functionality towards a specific use case.
However, with the growing number of features, this
tailoring process becomes increasingly difficult: As a
prominent example, the Linux kernel in v3.14 provides
nearly 14 000 configuration options to choose from.
Even developers of embedded systems refrain from trying
to build a minimized distinctive kernel configuration
for their device --- and thereby waste memory and money
for unneeded functionality. In this paper, we present
an approach for the automatic use-case specific
tailoring of system software for special-purpose
embedded systems. We evaluate the effectiveness of our
approach on the example of Linux by generating tailored
kernels for well-known applications of the Rasperry Pi
and a Google Nexus 4 smartphone. Compared to the
original configurations, our approach leads to memory
savings of 15-70 percent and requires only very little
manual intervention.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Ma:2015:ETS,
author = "Lei Ma and Cyrille Artho and Cheng Zhang and Hiroyuki
Sato",
title = "Efficient testing of software product lines via
centralization (short paper)",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "49--52",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658768",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software product line~(SPL) engineering manages
families of software products that share common
features. However, cost-effective test case generation
for an SPL is challenging. Applying existing test case
generation techniques to each product variant
separately may test common code in a redundant way.
Moreover, it is difficult to share the test results
among multiple product variants. In this paper, we
propose the use of centralization, which combines
multiple product variants from the same SPL and
generates test cases for the entire system. By taking
into account all variants, our technique generally
avoids generating redundant test cases for common
software components. Our case study on three SPLs shows
that compared with testing each variant independently,
our technique is more efficient and achieves higher
test coverage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Smeltzer:2015:TAD,
author = "Karl Smeltzer and Martin Erwig and Ronald Metoyer",
title = "A transformational approach to data visualization",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "53--62",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658769",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Information visualization construction tools generally
tend to fall in one of two disparate categories. Either
they offer simple but inflexible visualization
templates, or else they offer low-level graphical
primitives which need to be assembled manually. Those
that do offer flexible, domain-specific abstractions
rarely focus on incrementally building and transforming
visualizations, which could reduce limitations on the
style of workflows supported. We present a
Haskell-embedded DSL for data visualization that is
designed to provide such abstractions and
transformations. This DSL achieves additional
expressiveness and flexibility through common
functional programming idioms and the Haskell type
class hierarchy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Shioda:2015:LLD,
author = "Masato Shioda and Hideya Iwasaki and Shigeyuki Sato",
title = "{LibDSL}: a library for developing embedded domain
specific languages in {D} via template
metaprogramming",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "63--72",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658770",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a library called LibDSL that helps
the implementer of an embedded domain specific language
(EDSL) effectively develop it in D language. The LibDSL
library accepts as input some kinds of
``specifications'' of the EDSL that the implementer is
going to develop and a D program within which an EDSL
source program written by the user is embedded. It
produces the front-end code of an LALR parser for the
EDSL program and back-end code of the execution engine.
LibDSL is able to produce two kinds of execution
engines, namely compiler-based and interpreter-based
engines, either of which the user can properly choose
depending on whether an EDSL program is known at
compile time or not. We have implemented the LibDSL
system by using template metaprogramming and other
advanced facilities such as compile-time function
execution of D language. EDSL programs developed by
means of LibDSL have a nice integrativeness with the
host language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Jovanovic:2015:YYC,
author = "Vojin Jovanovic and Amir Shaikhha and Sandro Stucki
and Vladimir Nikolaev and Christoph Koch and Martin
Odersky",
title = "{Yin-Yang}: concealing the deep embedding of {DSLs}",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "73--82",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658771",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deeply embedded domain-specific languages (EDSLs)
intrinsically compromise programmer experience for
improved program performance. Shallow EDSLs complement
them by trading program performance for good programmer
experience. We present Yin-Yang, a framework for DSL
embedding that uses Scala macros to reliably translate
shallow EDSL programs to the corresponding deep EDSL
programs. The translation allows program prototyping
and development in the user friendly shallow embedding,
while the corresponding deep embedding is used where
performance is important. The reliability of the
translation completely conceals the deep em- bedding
from the user. For the DSL author, Yin-Yang
automatically generates the deep DSL embeddings from
their shallow counterparts by reusing the core
translation. This obviates the need for code
duplication and leads to reliability by construction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Hess:2015:ALF,
author = "Benjamin Hess and Thomas R. Gross and Markus
P{\"u}schel",
title = "Automatic locality-friendly interface extension of
numerical functions",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "83--92",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658772",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Raising the level of abstraction is a key concern of
software engineering, and libraries (either used
directly or as a target of a program generation system)
are a successful technique to raise programmer
productivity and to improve software quality.
Unfortunately successful libraries may contain
functions that may not be general enough. For example,
many numeric performance libraries contain functions
that work on one- or higher-dimensional arrays. A
problem arises if a program wants to invoke such a
function on a non-contiguous subarray (e.g., in C the
column of a matrix or a subarray of an image). If the
library developer did not foresee this scenario, the
client program must include explicit copy steps before
and after the library function call, incurring a
possibly high performance penalty. A better solution
would be an enhanced library function that allows for
the desired access pattern. Exposing the access pattern
allows the compiler to optimize for the intended usage
scenario(s). As we do not want the library developer to
generate all interesting versions manually, we present
a tool that takes a library function written in C and
generates such a customized function for typical
accesses. We describe the approach, discuss
limitations, and report on the performance. As example
access patterns we consider those most common in
numerical applications: striding and block striding,
general permutations, as well as scaling. We evaluate
the tool on various library functions including
filters, scans, reductions, sorting, FFTs, and linear
algebra operations. The automatically generated custom
version is in most cases significantly faster than
using individual steps, offering speed-ups that are
typically in the range of 1.2--1.8x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Kamin:2015:ORS,
author = "Sam Kamin and Mar{\'\i}a Jes{\'u}s Garzar{\'a}n and
Baris Aktemur and Danqing Xu and Buse Yilmaz and
Zhongbo Chen",
title = "Optimization by runtime specialization for sparse
matrix--vector multiplication",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "93--102",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658773",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Runtime specialization optimizes programs based on
partial information available only at run time. It is
applicable when some input data is used repeatedly
while other input data varies. This technique has the
potential of generating highly efficient codes. In this
paper, we explore the potential for obtaining speedups
for sparse matrix-dense vector multiplication using
runtime specialization, in the case where a single
matrix is to be multiplied by many vectors. We
experiment with five methods involving runtime
specialization, comparing them to methods that do not
(including Intel's MKL library). For this work, our
focus is the evaluation of the speedups that can be
obtained with runtime specialization without
considering the overheads of the code generation. Our
experiments use 23 matrices from the Matrix Market and
Florida collections, and run on five different
machines. In 94 of those 115 cases, the specialized
code runs faster than any version without
specialization. If we only use specialization, the
average speedup with respect to Intel's MKL library
ranges from 1.44x to 1.77x, depending on the machine.
We have also found that the best method depends on the
matrix and machine; no method is best for all matrices
and machines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Danilewski:2015:STD,
author = "Piotr Danilewski and Marcel K{\"o}ster and Roland
Lei{\ss}a and Richard Membarth and Philipp Slusallek",
title = "Specialization through dynamic staging",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "103--112",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658774",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Partial evaluation allows for specialization of
program fragments. This can be realized by staging,
where one fragment is executed earlier than its
surrounding code. However, taking advantage of these
capabilities is often a cumbersome endeavor. In this
paper, we present a new metaprogramming concept using
staging parameters that are first-class citizen
entities and define the order of execution of the
program. Staging parameters can be used to define
MetaML-like quotations, but can also allow stages to be
created and resolved dynamically. The programmer can
write generic, polyvariant code which can be reused in
the context of different stages. We demonstrate how our
approach can be used to define and apply
domain-specific optimizations. Our implementation of
the proposed metaprogramming concept generates code
which is on a par with templated C++ code in terms of
execution time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Asai:2015:CRL,
author = "Kenichi Asai",
title = "Compiling a reflective language using {MetaOCaml}",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "113--122",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658775",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A reflective language makes the language semantics
open to user programs and allows them to access,
extend, and modify it from within the same language
framework. Because of its high flexibility and
expressiveness, it can be an ideal platform for
programming language research as well as practical
applications in dynamic environments. However,
efficient implementation of a reflective language is
extremely difficult. Under the circumstance where the
language semantics can change, a partial evaluator is
required for compilation. This paper reports on the
experience of using MetaOCaml as a compiler for a
reflective language. With staging annotations,
MetaOCaml achieves the same effect as using a partial
evaluator. Unlike the standard partial evaluator, the
run mechanism of MetaOCaml enables us to use the
specialized (compiled) code in the current runtime
environment. On the other hand, the lack of a
binding-time analysis in MetaOCaml prohibits us from
compiling a user program under modified compiled
semantics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Humer:2015:DSL,
author = "Christian Humer and Christian Wimmer and Christian
Wirth and Andreas W{\"o}{\ss} and Thomas
W{\"u}rthinger",
title = "A domain-specific language for building
self-optimizing {AST} interpreters",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "123--132",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658776",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Self-optimizing AST interpreters dynamically adapt to
the provided input for faster execution. This
adaptation includes initial tests of the input, changes
to AST nodes, and insertion of guards that ensure
assumptions still hold. Such specialization and
speculation is essential for the performance of dynamic
programming languages such as JavaScript. In
traditional procedural and objectoriented programming
languages it can be tedious to write selfoptimizing AST
interpreters, as those languages fail to provide
constructs that would specifically support that. This
paper introduces a declarative domain-specific language
(DSL) that greatly simplifies writing self-optimizing
AST interpreters. The DSL supports specialization of
operations based on types of the input and other
properties. It can then use these specializations
directly or chain them to represent the operation with
the minimum amount of code possible. The DSL
significantly reduces the complexity of expressing
specializations for those interpreters. We use it in
our high-performance implementation of JavaScript,
where 274 language operations have an average of about
4 and a maximum of 190 specializations. In addition,
the DSL is used in implementations of Ruby, Python, R,
and Smalltalk.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Hill:2015:POO,
author = "James H. Hill and Dennis C. Feiock",
title = "{Pin++}: an object-oriented framework for writing
{Pintools}",
journal = j-SIGPLAN,
volume = "50",
number = "3",
pages = "133--141",
month = mar,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775053.2658777",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:23 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a framework named Pin++. Pin++ is
an object-oriented framework that uses template
metaprogramming to implement Pintools, which are
analysis tools for the dynamic binary instrumentation
tool named Pin. The goal of Pin++ is to simplify
programming a Pintool and promote reuse of its
components across different Pintools. Our results show
that Pintools implemented using Pin++ can have a 54\%
reduction in complexity, increase its modularity, and
up to 60\% reduction in instrumentation overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '14 conference proceedings.",
}
@Article{Ozturk:2015:ASC,
author = "Ozcan Ozturk",
title = "Architectural Support for Cyber-Physical Systems",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "1--1",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694375",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cyber-physical systems are integrations of
computation, communication networks, and physical
dynamics. Although time plays a central role in the
physical world, all widely used software abstractions
lack temporal semantics. The notion of correct
execution of a program written in every widely-used
programming language today does not depend on the
temporal behavior of the program. But temporal behavior
matters in almost all systems, and most particularly in
cyber-physical systems. In this talk, I will argue that
time can and must become part of the semantics of
programs for a large class of applications. To
illustrate that this is both practical and useful, we
will describe a recent effort at Berkeley in the design
and implementation of timing-centric software systems.
Specifically, I will describe PRET machines, which
redefine the instruction-set architecture (ISA) of a
microprocessor to embrace temporal semantics. Such
machines can be used in high-confidence and
safety-critical systems, in energy-constrained systems,
in mixed-criticality systems, and as a Real-Time Unit
(RTU) that cooperates with a general-purpose processor
to provide real-time services, in a manner similar to
how a GPU provides graphics services.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Zhang:2015:MRH,
author = "Yiying Zhang and Jian Yang and Amirsaman Memaripour
and Steven Swanson",
title = "{Mojim}: a Reliable and Highly-Available Non-Volatile
Memory System",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "3--18",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694370",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Next-generation non-volatile memories (NVMs) promise
DRAM-like performance, persistence, and high density.
They can attach directly to processors to form
non-volatile main memory (NVMM) and offer the
opportunity to build very low-latency storage systems.
These high-performance storage systems would be
especially useful in large-scale data center
environments where reliability and availability are
critical. However, providing reliability and
availability to NVMM is challenging, since the latency
of data replication can overwhelm the low latency that
NVMM should provide. We propose Mojim, a system that
provides the reliability and availability that
large-scale storage systems require, while preserving
the performance of NVMM. Mojim achieves these goals by
using a two-tier architecture in which the primary tier
contains a mirrored pair of nodes and the secondary
tier contains one or more secondary backup nodes with
weakly consistent copies of data. Mojim uses
highly-optimized replication protocols, software, and
networking stacks to minimize replication costs and
expose as much of NVMM?s performance as possible. We
evaluate Mojim using raw DRAM as a proxy for NVMM and
using an industrial NVMM emulation system. We find that
Mojim provides replicated NVMM with similar or even
better performance than un-replicated NVMM (reducing
latency by 27\% to 63\% and delivering between 0.4 to
2.7X the throughput). We demonstrate that replacing
MongoDB's built-in replication system with Mojim
improves MongoDB's performance by 3.4 to 4X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Wang:2015:SPC,
author = "Rujia Wang and Lei Jiang and Youtao Zhang and Jun
Yang",
title = "{SD-PCM}: Constructing Reliable Super Dense Phase
Change Memory under Write Disturbance",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "19--31",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694352",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Phase Change Memory (PCM) has better scalability and
smaller cell size comparing to DRAM. However, further
scaling PCM cell in deep sub-micron regime results in
significant thermal based write disturbance (WD).
Naively allocating large inter-cell space increases
cell size from 4F$^2$ ideal to 12F$^2$. While a recent
work mitigates WD along word-lines through disturbance
resilient data encoding, it is ineffective for WD along
bit-lines, which is more severe due to widely adopted $
\mu $Trench structure in constructing PCM cell arrays.
Without mitigating WD along bit-lines, a PCM cell still
has 8F2, which is 100\% larger than the ideal. In this
paper, we propose SD-PCM for achieving reliable write
operations in super dense PCM. In particular, we focus
on mitigating WD along bit-lines such that we can
construct super dense PCM chips with 4F$^2$ cell size,
i.e., the minimal for diode-switch based PCM. Based on
simple verification-n-correction (VnC), we propose
LazyCorrection and PreRead to effectively reduce VnC
overhead and minimize cascading verification during
write. We further propose (n:m)-Alloc for achieving
good tradeoff between VnC overhead minimization and
memory capacity loss. Our experimental results show
that, comparing to a WD-free low density PCM, SD-PCM
achieves 80\% capacity improvement in cell arrays while
incurring around 0-10\% performance degradation when
using different (n:m) allocators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Young:2015:DWE,
author = "Vinson Young and Prashant J. Nair and Moinuddin K.
Qureshi",
title = "{DEUCE}: Write-Efficient Encryption for Non-Volatile
Memories",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "33--44",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694387",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Phase Change Memory (PCM) is an emerging Non Volatile
Memory (NVM) technology that has the potential to
provide scalable high-density memory systems. While the
non-volatility of PCM is a desirable property in order
to save leakage power, it also has the undesirable
effect of making PCM main memories susceptible to newer
modes of security vulnerabilities, for example,
accessibility to sensitive data if a PCM DIMM gets
stolen. PCM memories can be made secure by encrypting
the data. Unfortunately, such encryption comes with a
significant overhead in terms of bits written to PCM
memory, causing half of the bits in the line to change
on every write, even if the actual number of bits being
written to memory is small. Our studies show that a
typical writeback modifies, on average, only 12\% of
the bits in the cacheline. Thus, encryption causes
almost a 4x increase in the number of bits written to
PCM memories. Such extraneous bit writes cause
significant increase in write power, reduction in write
endurance, and reduction in write bandwidth. To provide
the benefit of secure memory in a write efficient
manner this paper proposes Dual Counter Encryption
(DEUCE). DEUCE is based on the observation that a
typical writeback only changes a few words, so DEUCE
reencrypts only the words that have changed. We show
that DEUCE reduces the number of modified bits per
writeback for a secure memory from 50\% to 24\%, which
improves performance by 27\% and increases lifetime by
2x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Morrison:2015:TBT,
author = "Adam Morrison and Yehuda Afek",
title = "Temporally Bounding {TSO} for Fence-Free Asymmetric
Synchronization",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "45--58",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694374",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces a temporally bounded total store
ordering (TBTSO) memory model, and shows that it
enables nonblocking fence-free solutions to asymmetric
synchronization problems, such as those arising in
memory reclamation and biased locking. TBTSO
strengthens the TSO memory model by bounding the time
it takes a store to drain from the store buffer into
memory. This bound enables devising fence-free
algorithms for asymmetric problems, which require a
performance-critical fast path to synchronize with an
infrequently executed slow path. We demonstrate this by
constructing (1) a fence-free version of the hazard
pointers memory reclamation scheme, and (2) a
fence-free biased lock algorithm which is compatible
with unmanaged environments as it does not rely on safe
points or similar mechanisms. We further argue that
TBTSO can be implemented in hardware with modest
modifications to existing TSO architectures. However,
our design makes assumptions about proprietary
implementation details of commercial hardware; it thus
best serves as a starting point for a discussion on the
feasibility of hardware TBTSO implementation. We also
show how minimal OS support enables the adaptation of
TBTSO algorithms to x86 systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Matveev:2015:RHN,
author = "Alexander Matveev and Nir Shavit",
title = "Reduced Hardware {NOrec}: a Safe and Scalable Hybrid
Transactional Memory",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "59--71",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694393",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Because of hardware TM limitations, software fallbacks
are the only way to make TM algorithms guarantee
progress. Nevertheless, all known software fallbacks to
date, from simple locks to sophisticated versions of
the NOrec Hybrid TM algorithm, have either limited
scalability or weakened semantics. We propose a novel
reduced-hardware (RH) version of the NOrec HyTM
algorithm. Instead of an all-software slow path, in our
RH NOrec the slow-path is a ``mix'' of hardware and
software: one short hardware transaction executes a
maximal amount of initial reads in the hardware, and
the second executes all of the writes. This novel
combination of the RH approach and the NOrec algorithm
delivers the first Hybrid TM that scales while fully
preserving the hardware's original semantics of opacity
and privatization. Our GCC implementation of RH NOrec
is promising in that it shows improved performance
relative to all prior methods, at the concurrency
levels we could test today.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Orr:2015:SUR,
author = "Marc S. Orr and Shuai Che and Ayse Yilmazer and
Bradford M. Beckmann and Mark D. Hill and David A.
Wood",
title = "Synchronization Using Remote-Scope Promotion",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "73--86",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694350",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous system architecture (HSA) and OpenCL
define scoped synchronization to facilitate low
overhead communication across a subset of threads.
Scoped synchronization works well for static sharing
patterns, where consumer threads are known a priori. It
works poorly for dynamic sharing patterns (e.g., work
stealing) where programmers cannot use a faster small
scope due to the rare possibility that the work is
stolen by a thread in a distant slower scope. This puts
programmers in a conundrum: optimize the common case by
synchronizing at a faster small scope or use work
stealing at a slower large scope. In this paper, we
propose to extend scoped synchronization with
remote-scope promotion. This allows the most frequent
sharers to synchronize through a small scope.
Infrequent sharers synchronize by promoting that remote
small scope to a larger shared scope. Synchronization
using remote-scope promotion provides performance
robustness for dynamic workloads, where the benefits
provided by scoped synchronization and work stealing
are hard to anticipate. Compared to a na{\"\i}ve
baseline, static scoped synchronization alone achieves
a 1.07x speedup on average and dynamic work stealing
alone achieves a 1.18x speedup on average. In contrast,
synchronization using remote-scope promotion achieves a
robust 1.25x speedup on average, across a diverse set
of graph benchmarks and inputs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Liu:2015:GHS,
author = "Chang Liu and Austin Harris and Martin Maas and
Michael Hicks and Mohit Tiwari and Elaine Shi",
title = "{GhostRider}: a Hardware-Software System for Memory
Trace Oblivious Computation",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "87--101",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694385",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a new, co-designed compiler and
architecture called GhostRider for supporting privacy
preserving computation in the cloud. GhostRider ensures
all programs satisfy a property called memory-trace
obliviousness (MTO): Even an adversary that observes
memory, bus traffic, and access times while the program
executes can learn nothing about the program's
sensitive inputs and outputs. One way to achieve MTO is
to employ Oblivious RAM (ORAM), allocating all code and
data in a single ORAM bank, and to also disable caches
or fix the rate of memory traffic. This baseline
approach can be inefficient, and so GhostRider's
compiler uses a program analysis to do better,
allocating data to non-oblivious, encrypted RAM (ERAM)
and employing a scratchpad when doing so will not
compromise MTO. The compiler can also allocate to
multiple ORAM banks, which sometimes significantly
reduces access times.We have formalized our approach
and proved it enjoys MTO. Our FPGA-based hardware
prototype and simulation results show that GhostRider
significantly outperforms the baseline strategy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Fletcher:2015:FON,
author = "Christopher W. Fletcher and Ling Ren and Albert Kwon
and Marten van Dijk and Srinivas Devadas",
title = "Freecursive {ORAM}: [Nearly] Free Recursion and
Integrity Verification for Position-based Oblivious
{RAM}",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "103--116",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694353",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Oblivious RAM (ORAM) is a cryptographic primitive that
hides memory access patterns as seen by untrusted
storage. Recently, ORAM has been architected into
secure processors. A big challenge for hardware ORAM
schemes is how to efficiently manage the Position Map
(PosMap), a central component in modern ORAM
algorithms. Implemented naively, the PosMap causes ORAM
to be fundamentally unscalable in terms of on-chip
area. On the other hand, a technique called Recursive
ORAM fixes the area problem yet significantly increases
ORAM's performance overhead. To address this challenge,
we propose three new mechanisms. We propose a new ORAM
structure called the PosMap Lookaside Buffer (PLB) and
PosMap compression techniques to reduce the performance
overhead from Recursive ORAM empirically (the latter
also improves the construction asymptotically). Through
simulation, we show that these techniques reduce the
memory bandwidth overhead needed to support recursion
by 95\%, reduce overall ORAM bandwidth by 37\% and
improve overall SPEC benchmark performance by 1.27x. We
then show how our PosMap compression techniques further
facilitate an extremely efficient integrity
verification scheme for ORAM which we call PosMap MAC
(PMMAC). For a practical parameterization, PMMAC
reduces the amount of hashing needed for integrity
checking by $ \geq 68 \times $ relative to prior
schemes and introduces only 7\% performance overhead.
We prototype our mechanisms in hardware and report area
and clock frequency for a complete ORAM design
post-synthesis and post-layout using an ASIC flow in a
32~nm commercial process. With 2 DRAM channels, the
design post-layout runs at 1~GHz and has a total area
of .47~mm2. Depending on PLB-specific parameters, the
PLB accounts for 10\% to 26\% area. PMMAC costs 12\% of
total design area. Our work is the first to prototype
Recursive ORAM or ORAM with any integrity scheme in
hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Chisnall:2015:BPA,
author = "David Chisnall and Colin Rothwell and Robert N. M.
Watson and Jonathan Woodruff and Munraj Vadera and
Simon W. Moore and Michael Roe and Brooks Davis and
Peter G. Neumann",
title = "Beyond the {PDP-11}: Architectural Support for a
Memory-Safe {C} Abstract Machine",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "117--130",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694367",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new memory-safe interpretation of the C
abstract machine that provides stronger protection to
benefit security and debugging. Despite ambiguities in
the specification intended to provide implementation
flexibility, contemporary implementations of C have
converged on a memory model similar to the PDP-11, the
original target for C. This model lacks support for
memory safety despite well-documented impacts on
security and reliability. Attempts to change this model
are often hampered by assumptions embedded in a large
body of existing C code, dating back to the memory
model exposed by the original C compiler for the
PDP-11. Our experience with attempting to implement a
memory-safe variant of C on the CHERI experimental
microprocessor led us to identify a number of
problematic idioms. We describe these as well as their
interaction with existing memory safety schemes and the
assumptions that they make beyond the requirements of
the C specification. Finally, we refine the CHERI ISA
and abstract model for C, by combining elements of the
CHERI capability model and fat pointers, and present a
softcore CPU that implements a C abstract machine that
can run legacy C code with strong memory protection
guarantees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Ma:2015:SDS,
author = "Jiuyue Ma and Xiufeng Sui and Ninghui Sun and Yupeng
Li and Zihao Yu and Bowen Huang and Tianni Xu and
Zhicheng Yao and Yun Chen and Haibin Wang and Lixin
Zhang and Yungang Bao",
title = "Supporting Differentiated Services in Computers via
Programmable Architecture for Resourcing-on-Demand
{(PARD)}",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "131--143",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694382",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "This paper presents PARD, a programmable architecture
for resourcing-on-demand that provides a new
programming interface to convey an application's
high-level information like quality-of-service
requirements to the hardware. PARD enables new
functionalities like fully hardware-supported
virtualization and differentiated services in
computers. PARD is inspired by the observation that a
computer is inherently a network in which hardware
components communicate via packets (e.g., over the NoC
or PCIe). We apply principles of software-defined
networking to this intra-computer network and address
three major challenges. First, to deal with the
semantic gap between high-level applications and
underlying hardware packets, PARD attaches a high-level
semantic tag (e.g., a virtual machine or thread ID) to
each memory-access, I/O, or interrupt packet. Second,
to make hardware components more manageable, PARD
implements programmable control planes that can be
integrated into various shared resources (e.g., cache,
DRAM, and I/O devices) and can differentially process
packets according to tag-based rules. Third, to
facilitate programming, PARD abstracts all control
planes as a device file tree to provide a uniform
programming interface via which users create and apply
tag-based rules. Full-system simulation results show
that by co-locating latency-critical memcached
applications with other workloads PARD can improve a
four-core computer's CPU utilization by up to a factor
of four without significantly increasing tail latency.
FPGA emulation based on a preliminary RTL
implementation demonstrates that the cache control
plane introduces no extra latency and that the memory
control plane can reduce queueing delay for
high-priority memory-access requests by up to a factor
of 5.6.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Omote:2015:IAE,
author = "Yushi Omote and Takahiro Shinagawa and Kazuhiko Kato",
title = "Improving Agility and Elasticity in Bare-metal
Clouds",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "145--159",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694349",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Bare-metal clouds are an emerging
infrastructure-as-a-service (IaaS) that leases physical
machines (bare-metal instances) rather than virtual
machines, allowing resource-intensive applications to
have exclusive access to physical hardware.
Unfortunately, bare-metal instances require
time-consuming or OS-specific tasks for deployment due
to the lack of virtualization layers, thereby
sacrificing several beneficial features of traditional
IaaS clouds such as agility, elasticity, and OS
transparency. We present BMcast, an OS deployment
system with a special-purpose de-virtualizable virtual
machine monitor (VMM) that supports quick and
OS-transparent startup of bare-metal instances. BMcast
performs streaming OS deployment while allowing direct
access to physical hardware from the guest OS, and then
disappears after completing the deployment. Quick
startup of instances improves agility and elasticity
significantly, and OS transparency greatly simplifies
management tasks for cloud customers. Experimental
results have confirmed that BMcast initiated a
bare-metal instance 8.6 times faster than image
copying, and database performance on BMcast during
streaming OS deployment was comparable to that on a
state-of-the-art VMM without performing deployment.
BMcast incurred zero overhead after
de-virtualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Haque:2015:FMI,
author = "Md E. Haque and Yong hun Eom and Yuxiong He and Sameh
Elnikety and Ricardo Bianchini and Kathryn S.
McKinley",
title = "Few-to-Many: Incremental Parallelism for Reducing Tail
Latency in Interactive Services",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "161--175",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694384",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Interactive services, such as Web search,
recommendations, games, and finance, must respond
quickly to satisfy customers. Achieving this goal
requires optimizing tail (e.g., 99th+ percentile)
latency. Although every server is multicore,
parallelizing individual requests to reduce tail
latency is challenging because (1) service demand is
unknown when requests arrive; (2) blindly parallelizing
all requests quickly oversubscribes hardware resources;
and (3) parallelizing the numerous short requests will
not improve tail latency. This paper introduces
Few-to-Many (FM) incremental parallelization, which
dynamically increases parallelism to reduce tail
latency. FM uses request service demand profiles and
hardware parallelism in an offline phase to compute a
policy, represented as an interval table, which
specifies when and how much software parallelism to
add. At runtime, FM adds parallelism as specified by
the interval table indexed by dynamic system load and
request execution time progress. The longer a request
executes, the more parallelism FM adds. We evaluate FM
in Lucene, an open-source enterprise search engine, and
in Bing, a commercial Web search engine. FM improves
the 99th percentile response time up to 32\% in Lucene
and up to 26\% in Bing, compared to prior
state-of-the-art parallelization. Compared to running
requests sequentially in Bing, FM improves tail latency
by a factor of two. These results illustrate that
incremental parallelism is a powerful tool for reducing
tail latency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Colp:2015:PDS,
author = "Patrick Colp and Jiawen Zhang and James Gleeson and
Sahil Suneja and Eyal de Lara and Himanshu Raj and
Stefan Saroiu and Alec Wolman",
title = "Protecting Data on {Smartphones} and Tablets from
Memory Attacks",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "177--189",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694380",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Smartphones and tablets are easily lost or stolen.
This makes them susceptible to an inexpensive class of
memory attacks, such as cold-boot attacks, using a bus
monitor to observe the memory bus, and DMA attacks.
This paper describes Sentry, a system that allows
applications and OS components to store their code and
data on the System-on-Chip (SoC) rather than in DRAM.
We use ARM-specific mechanisms originally designed for
embedded systems, but still present in today's mobile
devices, to protect applications and OS subsystems from
memory attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Dautenhahn:2015:NKO,
author = "Nathan Dautenhahn and Theodoros Kasampalis and Will
Dietz and John Criswell and Vikram Adve",
title = "Nested Kernel: an Operating System Architecture for
Intra-Kernel Privilege Separation",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "191--206",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694386",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Monolithic operating system designs undermine the
security of computing systems by allowing single
exploits anywhere in the kernel to enjoy full
supervisor privilege. The nested kernel operating
system architecture addresses this problem by
``nesting'' a small isolated kernel within a
traditional monolithic kernel. The ``nested kernel''
interposes on all updates to virtual memory
translations to assert protections on physical memory,
thus significantly reducing the trusted computing base
for memory access control enforcement. We incorporated
the nested kernel architecture into FreeBSD on x86-64
hardware while allowing the entire operating system,
including untrusted components, to operate at the
highest hardware privilege level by write-protecting
MMU translations and de-privileging the untrusted part
of the kernel. Our implementation inherently enforces
kernel code integrity while still allowing dynamically
loaded kernel modules, thus defending against code
injection attacks. We also demonstrate that the nested
kernel architecture allows kernel developers to isolate
memory in ways not possible in monolithic kernels by
introducing write-mediation and write-logging services
to protect critical system data structures. Performance
of the nested kernel prototype shows modest overheads:
$ < 1 \% $ average for Apache and 2.7\% for kernel
compile. Overall, our results and experience show that
the nested kernel design can be retrofitted to existing
monolithic kernels, providing important security
benefits.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Tan:2015:DWS,
author = "Zhangxi Tan and Zhenghao Qian and Xi Chen and Krste
Asanovic and David Patterson",
title = "{DIABLO}: a Warehouse-Scale Computer Network Simulator
using {FPGAs}",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "207--221",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694362",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Motivated by rapid software and hardware innovation in
warehouse-scale computing (WSC), we visit the problem
of warehouse-scale network design evaluation. A WSC is
composed of about 30 arrays or clusters, each of which
contains about 3000 servers, leading to a total of
about 100,000 servers per WSC. We found many prior
experiments have been conducted on relatively small
physical testbeds, and they often assume the workload
is static and that computations are only loosely
coupled with the adaptive networking stack. We present
a novel and cost-efficient FPGA-based evaluation
methodology, called Datacenter-In-A-Box at LOw cost
(DIABLO), which treats arrays as whole computers with
tightly integrated hardware and software. We have built
a 3,000-node prototype running the full WSC software
stack. Using our prototype, we have successfully
reproduced a few WSC phenomena, such as TCP Incast and
memcached request latency long tail, and found that
results do indeed change with both scale and with
version of the full software stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Hauswald:2015:SOE,
author = "Johann Hauswald and Michael A. Laurenzano and Yunqi
Zhang and Cheng Li and Austin Rovinski and Arjun
Khurana and Ronald G. Dreslinski and Trevor Mudge and
Vinicius Petrucci and Lingjia Tang and Jason Mars",
title = "{Sirius}: an Open End-to-End Voice and Vision Personal
Assistant and Its Implications for Future Warehouse
Scale Computers",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "223--238",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694347",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As user demand scales for intelligent personal
assistants (IPAs) such as Apple's Siri, Google's Google
Now, and Microsoft's Cortana, we are approaching the
computational limits of current datacenter
architectures. It is an open question how future server
architectures should evolve to enable this emerging
class of applications, and the lack of an open-source
IPA workload is an obstacle in addressing this
question. In this paper, we present the design of
Sirius, an open end-to-end IPA web-service application
that accepts queries in the form of voice and images,
and responds with natural language. We then use this
workload to investigate the implications of four points
in the design space of future accelerator-based server
architectures spanning traditional CPUs, GPUs, manycore
throughput co-processors, and FPGAs. To investigate
future server designs for Sirius, we decompose Sirius
into a suite of 7 benchmarks (Sirius Suite) comprising
the computationally intensive bottlenecks of Sirius. We
port Sirius Suite to a spectrum of accelerator
platforms and use the performance and power trade-offs
across these platforms to perform a total cost of
ownership (TCO) analysis of various server design
points. In our study, we find that accelerators are
critical for the future scalability of IPA services.
Our results show that GPU- and FPGA-accelerated servers
improve the query latency on average by 10x and 16x.
For a given throughput, GPU- and FPGA-accelerated
servers can reduce the TCO of datacenters by 2.6x and
1.4x, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Xu:2015:ALD,
author = "Chao Xu and Felix Xiaozhu Lin and Yuyang Wang and Lin
Zhong",
title = "Automated {OS}-level Device Runtime Power Management",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "239--252",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694360",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Non-CPU devices on a modern system-on-a-chip (SoC),
ranging from accelerators to I/O controllers, account
for a significant portion of the chip area. It is
therefore vital for system energy efficiency that idle
devices can enter a low-power state while still meeting
the performance expectation. This is called device
runtime Power Management (PM) for which individual
device drivers in commodity OSes are held responsible
today. Based on the observations of existing drivers
and their evolution, we consider it harmful to rely on
drivers for device runtime PM. This paper identifies
three pieces of information as essential to device
runtime PM, and shows that they can be obtained without
involving drivers, either by using a software-only
approach, or more efficiently, by adding one register
bit to each device. We thus suggest a structural change
to the current Linux runtime PM framework, replacing
the PM code in all applicable drivers with a single
kernel module called the central PM agent. Experimental
evaluations show that the central PM agent is just as
effective as hand-tuned driver PM code. The paper also
presents a tool called PowerAdvisor that simplifies
driver PM efforts under the current Linux runtime PM
framework. PowerAdvisor analyzes execution traces and
suggests where to insert PM calls in driver source
code. Despite being a best-effort tool, PowerAdvisor
not only reproduces hand-tuned PM code from stock
drivers, but also correctly suggests PM code never
known before. Overall, our experience shows that it is
promising to ultimately free driver developers from
manual PM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Goiri:2015:CTV,
author = "{\'I}{\~n}igo Goiri and Thu D. Nguyen and Ricardo
Bianchini",
title = "{CoolAir}: Temperature- and Variation-Aware Management
for Free-Cooled Datacenters",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "253--265",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694378",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite its benefits, free cooling may expose servers
to high absolute temperatures, wide temperature
variations, and high humidity when datacenters are
sited at certain locations. Prior research (in
non-free-cooled datacenters) has shown that high
temperatures and/or wide temporal temperature
variations can harm hardware reliability. In this
paper, we identify the runtime management strategies
required to limit absolute temperatures, temperature
variations, humidity, and cooling energy in free-cooled
datacenters. As the basis for our study, we propose
CoolAir, a system that embodies these strategies. Using
CoolAir and a real free-cooled datacenter prototype, we
show that effective management requires cooling
infrastructures that can act smoothly. In addition, we
show that CoolAir can tightly manage temperature and
significantly reduce temperature variation, often at a
lower cooling cost than existing free-cooled
datacenters. Perhaps most importantly, based on our
results, we derive several principles and lessons that
should guide the design of management systems for
free-cooled datacenters of any size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Mishra:2015:PGM,
author = "Nikita Mishra and Huazhe Zhang and John D. Lafferty
and Henry Hoffmann",
title = "A Probabilistic Graphical Model-based Approach for
Minimizing Energy Under Performance Constraints",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "267--281",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In many deployments, computer systems are
underutilized --- meaning that applications have
performance requirements that demand less than full
system capacity. Ideally, we would take advantage of
this under-utilization by allocating system resources
so that the performance requirements are met and energy
is minimized. This optimization problem is complicated
by the fact that the performance and power consumption
of various system configurations are often application
--- or even input --- dependent. Thus, practically,
minimizing energy for a performance constraint requires
fast, accurate estimations of application-dependent
performance and power tradeoffs. This paper
investigates machine learning techniques that enable
energy savings by learning Pareto-optimal power and
performance tradeoffs. Specifically, we propose LEO, a
probabilistic graphical model-based learning system
that provides accurate online estimates of an
application's power and performance as a function of
system configuration. We compare LEO to (1) offline
learning, (2) online learning, (3) a heuristic
approach, and (4) the true optimal solution. We find
that LEO produces the most accurate estimates and near
optimal energy savings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Pang:2015:MLL,
author = "Jun Pang and Chris Dwyer and Alvin R. Lebeck",
title = "More is Less, Less is More: Molecular-Scale Photonic
{NoC} Power Topologies",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "283--296",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694377",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Molecular-scale Network-on-Chip (mNoC) crossbars use
quantum dot LEDs as an on-chip light source, and
chromophores to provide optical signal filtering for
receivers. An mNoC reduces power consumption or enables
scaling to larger crossbars for a reduced energy budget
compared to current nanophotonic NoC crossbars. Since
communication latency is reduced by using a high-radix
crossbar, minimizing power consumption becomes a
primary design target. Conventional Single Writer
Multiple Reader (SWMR) photonic crossbar designs
broadcast all packets, and incur the commensurate
required power, even if only two nodes are
communicating. This paper introduces power topologies,
enabled by unique capabilities of mNoC technology, to
reduce overall interconnect power consumption. A power
topology corresponds to the logical connectivity
provided by a given power mode. Broadcast is one power
mode and it consumes the maximum power. Additional
power modes consume less power but allow a source to
communicate with only a statically defined, potentially
non-contiguous, subset of nodes. Overall interconnect
power is reduced if the more frequently communicating
nodes use modes that consume less power, while less
frequently communicating nodes use modes that consume
more power. We also investigate thread mapping
techniques to fully exploit power topologies. We
explore various mNoC power topologies with one, two and
four power modes for a radix-256 SWMR mNoC crossbar.
Our results show that the combination of power
topologies and intelligent thread mapping can reduce
total mNoC power by up to 51\% on average for a set of
12 SPLASH benchmarks. Furthermore performance is 10\%
better than conventional resonator-based photonic NoCs
and energy is reduced by 72\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Sridharan:2015:MEM,
author = "Vilas Sridharan and Nathan DeBardeleben and Sean
Blanchard and Kurt B. Ferreira and Jon Stearley and
John Shalf and Sudhanva Gurumurthi",
title = "Memory Errors in Modern Systems: The Good, The Bad,
and The Ugly",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "297--310",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694348",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several recent publications have shown that hardware
faults in the memory subsystem are commonplace. These
faults are predicted to become more frequent in future
systems that contain orders of magnitude more DRAM and
SRAM than found in current memory subsystems. These
memory subsystems will need to provide resilience
techniques to tolerate these faults when deployed in
high-performance computing systems and data centers
containing tens of thousands of nodes. Therefore, it is
critical to understand the efficacy of current hardware
resilience techniques to determine whether they will be
suitable for future systems. In this paper, we present
a study of DRAM and SRAM faults and errors from the
field. We use data from two leadership-class
high-performance computer systems to analyze the
reliability impact of hardware resilience schemes that
are deployed in current systems. Our study has several
key findings about the efficacy of many currently
deployed reliability techniques such as DRAM ECC, DDR
address/command parity, and SRAM ECC and parity. We
also perform a methodological study, and find that
counting errors instead of faults, a common practice
among researchers and data center operators, can lead
to incorrect conclusions about system reliability.
Finally, we use our data to project the needs of future
large-scale systems. We find that SRAM faults are
unlikely to pose a significantly larger reliability
threat in the future, while DRAM faults will be a major
concern and stronger DRAM resilience schemes will be
needed to maintain acceptable failure rates similar to
those found on today's systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Yetim:2015:CMC,
author = "Yavuz Yetim and Sharad Malik and Margaret Martonosi",
title = "{CommGuard}: Mitigating Communication Errors in
Error-Prone Parallel Execution",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "311--323",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694354",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As semiconductor technology scales towards
ever-smaller transistor sizes, hardware fault rates are
increasing. Since important application classes (e.g.,
multimedia, streaming workloads) are
data-error-tolerant, recent research has proposed
techniques that seek to save energy or improve yield by
exploiting error tolerance at the
architecture/microarchitecture level. Even seemingly
error-tolerant applications, however, will crash or
hang due to control-flow/memory addressing errors. In
parallel computation, errors involving inter-thread
communication can have equally catastrophic effects.
Our work explores techniques that mitigate the impact
of potentially catastrophic errors in parallel
computation, while still garnering power, cost, or
yield benefits from data error tolerance. Our proposed
CommGuard solution uses FSM-based checkers to pad and
discard data in order to maintain semantic alignment
between program control flow and the data communicated
between processors. CommGuard techniques are low
overhead and they exploit application information
already provided by some parallel programming languages
(e.g. StreamIt). By converting potentially catastrophic
communication errors into potentially tolerable data
errors, CommGuard allows important streaming
applications like JPEG and MP3 decoding to execute
without crashing and to sustain good output quality,
even for errors as frequent as every 500 \mu s.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Kim:2015:DEF,
author = "Dohyeong Kim and Yonghwi Kwon and William N. Sumner
and Xiangyu Zhang and Dongyan Xu",
title = "Dual Execution for On-the-Fly Fine Grained Execution
Comparison",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "325--338",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694394",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Execution comparison has many applications in
debugging, malware analysis, software feature
identification, and intrusion detection. Existing
comparison techniques have various limitations. Some
can only compare at the system event level and require
executions to take the same input. Some require storing
instruction traces that are very space-consuming and
have difficulty dealing with non-determinism. In this
paper, we propose a novel dual execution technique that
allows on-the-fly comparison at the instruction level.
Only differences between the executions are recorded.
It allows executions to proceed in a coupled mode such
that they share the same input sequence with the same
timing, reducing nondeterminism. It also allows them to
proceed in a decoupled mode such that the user can
interact with each one differently. Decoupled
executions can be recoupled to share the same future
inputs and facilitate further comparison. We have
implemented a prototype and applied it to identifying
functional components for reuse, comparative debugging
with new GDB primitives, and understanding real world
regression failures. Our results show that dual
execution is a critical enabling technique for
execution comparison.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Hosek:2015:VUE,
author = "Petr Hosek and Cristian Cadar",
title = "{VARAN} the Unbelievable: an Efficient {$N$}-version
Execution Framework",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "339--353",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694390",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the widespread availability of multi-core
processors, running multiple diversified variants or
several different versions of an application in
parallel is becoming a viable approach for increasing
the reliability and security of software systems. The
key component of such N-version execution (NVX) systems
is a runtime monitor that enables the execution of
multiple versions in parallel. Unfortunately, existing
monitors impose either a large performance overhead or
rely on intrusive kernel-level changes. Moreover, none
of the existing solutions scales well with the number
of versions, since the runtime monitor acts as a
performance bottleneck. In this paper, we introduce
Varan, an NVX framework that combines selective binary
rewriting with a novel event-streaming architecture to
significantly reduce performance overhead and scale
well with the number of versions, without relying on
intrusive kernel modifications. Our evaluation shows
that Varan can run NVX systems based on popular C10k
network servers with only a modest performance
overhead, and can be effectively used to increase
software reliability using techniques such as
transparent failover, live sanitization and
multi-revision execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Malka:2015:REI,
author = "Moshe Malka and Nadav Amit and Muli Ben-Yehuda and Dan
Tsafrir",
title = "{rIOMMU}: Efficient {IOMMU} for {I/O} Devices that
Employ Ring Buffers",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "355--368",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694355",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The IOMMU allows the OS to encapsulate I/O devices in
their own virtual memory spaces, thus restricting their
DMAs to specific memory pages. The OS uses the IOMMU to
protect itself against buggy drivers and
malicious/errant devices. But the added protection
comes at a cost, degrading the throughput of
I/O-intensive workloads by up to an order of magnitude.
This cost has motivated system designers to trade off
some safety for performance, e.g., by leaving stale
information in the IOTLB for a while so as to amortize
costly invalidations. We observe that high-bandwidth
devices---like network and PCIe SSD
controllers---interact with the OS via circular ring
buffers that induce a sequential, predictable workload.
We design a ring IOMMU (rIOMMU) that leverages this
characteristic by replacing the virtual memory page
table hierarchy with a circular, flat table. A flat
table is adequately supported by exactly one IOTLB
entry, making every new translation an implicit
invalidation of the former and thus requiring explicit
invalidations only at the end of I/O bursts. Using
standard networking benchmarks, we show that rIOMMU
provides up to 7.56x higher throughput relative to the
baseline IOMMU, and that it is within 0.77--1.00x the
throughput of a system without IOMMU protection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Liu:2015:PPM,
author = "Daofu Liu and Tianshi Chen and Shaoli Liu and Jinhong
Zhou and Shengyuan Zhou and Olivier Teman and Xiaobing
Feng and Xuehai Zhou and Yunji Chen",
title = "{PuDianNao}: a Polyvalent Machine Learning
Accelerator",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "369--381",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694358",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Machine Learning (ML) techniques are pervasive tools
in various emerging commercial applications, but have
to be accommodated by powerful computer systems to
process very large data. Although general-purpose CPUs
and GPUs have provided straightforward solutions, their
energy-efficiencies are limited due to their excessive
supports for flexibility. Hardware accelerators may
achieve better energy-efficiencies, but each
accelerator often accommodates only a single ML
technique (family). According to the famous
No-Free-Lunch theorem in the ML domain, however, an ML
technique performs well on a dataset may perform poorly
on another dataset, which implies that such accelerator
may sometimes lead to poor learning accuracy. Even if
regardless of the learning accuracy, such accelerator
can still become inapplicable simply because the
concrete ML task is altered, or the user chooses
another ML technique. In this study, we present an ML
accelerator called PuDianNao, which accommodates seven
representative ML techniques, including k-means,
k-nearest neighbors, naive Bayes, support vector
machine, linear regression, classification tree, and
deep neural network. Benefited from our thorough
analysis on computational primitives and locality
properties of different ML techniques, PuDianNao can
perform up to 1056 GOP/s (e.g., additions and
multiplications) in an area of 3.51 mm^2, and consumes
596 mW only. Compared with the NVIDIA K20M GPU (28nm
process), PuDianNao (65nm process) is 1.20x faster, and
can reduce the energy by 128.41x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Goiri:2015:ABA,
author = "Inigo Goiri and Ricardo Bianchini and Santosh
Nagarakatte and Thu D. Nguyen",
title = "{ApproxHadoop}: Bringing Approximations to {MapReduce}
Frameworks",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "383--397",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694351",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose and evaluate a framework for creating and
running approximation-enabled MapReduce programs.
Specifically, we propose approximation mechanisms that
fit naturally into the MapReduce paradigm, including
input data sampling, task dropping, and accepting and
running a precise and a user-defined approximate
version of the MapReduce code. We then show how to
leverage statistical theories to compute error bounds
for popular classes of MapReduce programs when
approximating with input data sampling and/or task
dropping. We implement the proposed mechanisms and
error bound estimations in a prototype system called
ApproxHadoop. Our evaluation uses MapReduce
applications from different domains, including data
analytics, scientific computing, video encoding, and
machine learning. Our results show that ApproxHadoop
can significantly reduce application execution time
and/or energy consumption when the user is willing to
tolerate small errors. For example, ApproxHadoop can
reduce runtimes by up to 32x when the user can tolerate
an error of 1\% with 95\% confidence. We conclude that
our framework and system can make approximation easily
accessible to many application domains using the
MapReduce model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Ringenburg:2015:MDQ,
author = "Michael Ringenburg and Adrian Sampson and Isaac
Ackerman and Luis Ceze and Dan Grossman",
title = "Monitoring and Debugging the Quality of Results in
Approximate Programs",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "399--411",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694365",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy efficiency is a key concern in the design of
modern computer systems. One promising approach to
energy-efficient computation, approximate computing,
trades off output accuracy for significant gains in
energy efficiency. However, debugging the actual cause
of output quality problems in approximate programs is
challenging. This paper presents dynamic techniques to
debug and monitor the quality of approximate
computations. We propose both offline debugging tools
that instrument code to determine the key sources of
output degradation and online approaches that monitor
the quality of deployed applications. We present two
offline debugging techniques and three online
monitoring mechanisms. The first offline tool
identifies correlations between output quality and the
execution of individual approximate operations. The
second tracks approximate operations that flow into a
particular value. Our online monitoring mechanisms are
complementary approaches designed for detecting quality
problems in deployed applications, while still
maintaining the energy savings from approximation. We
present implementations of our techniques and describe
their usage with seven applications. Our online
monitors control output quality while still maintaining
significant energy efficiency gains, and our offline
tools provide new insights into the effects of
approximation on output quality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Banavar:2015:WEC,
author = "Guruduth Banavar",
title = "{Watson} and the Era of Cognitive Computing",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "413--413",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694376",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the last decade, the availability of massive
amounts of new data, and the development of new machine
learning technologies, have augmented reasoning systems
to give rise to a new class of computing systems. These
``Cognitive Systems'' learn from data, reason from
models, and interact naturally with us, to perform
complex tasks better than either humans or machines can
do by themselves. In essence, cognitive systems help us
perform like the best by penetrating the complexity of
big data and leverage the power of models. One of the
first cognitive systems, called Watson, demonstrated
through a Jeopardy! exhibition match, that it was
capable of answering complex factoid questions as
effectively as the world's champions. Follow-on
cognitive systems perform other tasks, such as
discovery, reasoning, and multi-modal understanding in
a variety of domains, such as healthcare, insurance,
and education. We believe such cognitive systems will
transform every industry and our everyday life for the
better. In this talk, I will give an overview of the
applications, the underlying capabilities, and some of
the key challenges, of cognitive systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Stewart:2015:ZDW,
author = "Gordon Stewart and Mahanth Gowda and Geoffrey Mainland
and Bozidar Radunovic and Dimitrios Vytiniotis and
Cristina Luengo Agullo",
title = "{Ziria}: a {DSL} for Wireless Systems Programming",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "415--428",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694368",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software-defined radio (SDR) brings the flexibility of
software to wireless protocol design, promising an
ideal platform for innovation and rapid protocol
deployment. However, implementing modern wireless
protocols on existing SDR platforms often requires
careful hand-tuning of low-level code, which can
undermine the advantages of software. Ziria is a new
domain-specific language (DSL) that offers programming
abstractions suitable for wireless physical (PHY) layer
tasks while emphasizing the pipeline reconfiguration
aspects of PHY programming. The Ziria compiler
implements a rich set of specialized optimizations,
such as lookup table generation and pipeline fusion. We
also offer a novel --- due to pipeline reconfiguration
--- algorithm to optimize the data widths of
computations in Ziria pipelines. We demonstrate the
programming flexibility of Ziria and the performance of
the generated code through a detailed evaluation of a
line-rate Ziria WiFi 802.11a/g implementation that is
on par and in many cases outperforms a hand-tuned
state-of-the-art C++ implementation on commodity
CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Mullapudi:2015:PAO,
author = "Ravi Teja Mullapudi and Vinay Vasista and Uday
Bondhugula",
title = "{PolyMage}: Automatic Optimization for Image
Processing Pipelines",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "429--443",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694364",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents the design and implementation of
PolyMage, a domain-specific language and compiler for
image processing pipelines. An image processing
pipeline can be viewed as a graph of interconnected
stages which process images successively. Each stage
typically performs one of point-wise, stencil,
reduction or data-dependent operations on image pixels.
Individual stages in a pipeline typically exhibit
abundant data parallelism that can be exploited with
relative ease. However, the stages also require high
memory bandwidth preventing effective utilization of
parallelism available on modern architectures. For
applications that demand high performance, the
traditional options are to use optimized libraries like
OpenCV or to optimize manually. While using libraries
precludes optimization across library routines, manual
optimization accounting for both parallelism and
locality is very tedious. The focus of our system,
PolyMage, is on automatically generating
high-performance implementations of image processing
pipelines expressed in a high-level declarative
language. Our optimization approach primarily relies on
the transformation and code generation capabilities of
the polyhedral compiler framework. To the best of our
knowledge, this is the first model-driven compiler for
image processing pipelines that performs complex
fusion, tiling, and storage optimization automatically.
Experimental results on a modern multicore system show
that the performance achieved by our automatic approach
is up to 1.81x better than that achieved through manual
tuning in Halide, a state-of-the-art language and
compiler for image processing pipelines. For a camera
raw image processing pipeline, our performance is
comparable to that of a hand-tuned implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Heckey:2015:CMC,
author = "Jeff Heckey and Shruti Patil and Ali JavadiAbhari and
Adam Holmes and Daniel Kudrow and Kenneth R. Brown and
Diana Franklin and Frederic T. Chong and Margaret
Martonosi",
title = "Compiler Management of Communication and Parallelism
for Quantum Computation",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "445--456",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694357",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Quantum computing (QC) offers huge promise to
accelerate a range of computationally intensive
benchmarks. Quantum computing is limited, however, by
the challenges of decoherence: i.e., a quantum state
can only be maintained for short windows of time before
it decoheres. While quantum error correction codes can
protect against decoherence, fast execution time is the
best defense against decoherence, so efficient
architectures and effective scheduling algorithms are
necessary. This paper proposes the Multi-SIMD QC
architecture and then proposes and evaluates effective
schedulers to map benchmark descriptions onto
Multi-SIMD architectures. The Multi-SIMD model consists
of a small number of SIMD regions, each of which may
support operations on up to thousands of qubits per
cycle. Efficient Multi-SIMD operation requires
efficient scheduling. This work develops schedulers to
reduce communication requirements of qubits between
operating regions, while also improving parallelism.We
find that communication to global memory is a dominant
cost in QC. We also note that many quantum benchmarks
have long serial operation paths (although each
operation may be data parallel). To exploit this
characteristic, we introduce Longest-Path-First
Scheduling (LPFS) which pins operations to SIMD regions
to keep data in-place and reduce communication to
memory. The use of small, local scratchpad memories
also further reduces communication. Our results show a
3\% to 308\% improvement for LPFS over conventional
scheduling algorithms, and an additional 3\% to 64\%
improvement using scratchpad memories. Our work is the
most comprehensive software-to-quantum toolflow
published to date, with efficient and practical
scheduling techniques that reduce communication and
increase parallelism for full-scale quantum code
executing up to a trillion quantum gate operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Hassaan:2015:KDG,
author = "Muhammad Amber Hassaan and Donald D. Nguyen and Keshav
K. Pingali",
title = "Kinetic Dependence Graphs",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "457--471",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694363",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Task graphs or dependence graphs are used in runtime
systems to schedule tasks for parallel execution. In
problem domains such as dense linear algebra and signal
processing, dependence graphs can be generated from a
program by static analysis. However, in emerging
problem domains such as graph analytics, the set of
tasks and dependences between tasks in a program are
complex functions of runtime values and cannot be
determined statically. In this paper, we introduce a
novel approach for exploiting parallelism in such
programs. This approach is based on a data structure
called the kinetic dependence graph (KDG), which
consists of a dependence graph together with update
rules that incrementally update the graph to reflect
changes in the dependence structure whenever a task is
completed. We have implemented a simple programming
model that allows programmers to write these
applications at a high level of abstraction, and a
runtime within the Galois system [15] that builds the
KDG automatically and executes the program in parallel.
On a suite of programs that are difficult to
parallelize otherwise, we have obtained speedups of up
to 33 on 40 cores, out-performing third-party
implementations in many cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Sidiroglou-Douskos:2015:TAI,
author = "Stelios Sidiroglou-Douskos and Eric Lahtinen and
Nathan Rittenhouse and Paolo Piselli and Fan Long and
Deokhwan Kim and Martin Rinard",
title = "Targeted Automatic Integer Overflow Discovery Using
Goal-Directed Conditional Branch Enforcement",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "473--486",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694389",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new technique and system, DIODE, for
auto- matically generating inputs that trigger
overflows at memory allocation sites. DIODE is designed
to identify relevant sanity checks that inputs must
satisfy to trigger overflows at target memory
allocation sites, then generate inputs that satisfy
these sanity checks to successfully trigger the
overflow. DIODE works with off-the-shelf, production
x86 binaries. Our results show that, for our benchmark
set of applications, and for every target memory
allocation site exercised by our seed inputs (which the
applications process correctly with no overflows),
either (1) DIODE is able to generate an input that
triggers an overflow at that site or (2) there is no
input that would trigger an overflow for the observed
target expression at that site.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Dhawan:2015:ASS,
author = "Udit Dhawan and Catalin Hritcu and Raphael Rubin and
Nikos Vasilakis and Silviu Chiricescu and Jonathan M.
Smith and Thomas F. {Knight, Jr.} and Benjamin C.
Pierce and Andre DeHon",
title = "Architectural Support for Software-Defined Metadata
Processing",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "487--502",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694383",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Optimized hardware for propagating and checking
software-programmable metadata tags can achieve low
runtime overhead. We generalize prior work on hardware
tagging by considering a generic architecture that
supports software-defined policies over metadata of
arbitrary size and complexity; we introduce several
novel microarchitectural optimizations that keep the
overhead of this rich processing low. Our model thus
achieves the efficiency of previous hardware-based
approaches with the flexibility of the software-based
ones. We demonstrate this by using it to enforce four
diverse safety and security policies---spatial and
temporal memory safety, taint tracking, control-flow
integrity, and code and data separation---plus a
composite policy that enforces all of them
simultaneously. Experiments on SPEC CPU2006 benchmarks
with a PUMP-enhanced RISC processor show modest impact
on runtime (typically under 10\%) and power ceiling
(less than 10\%), in return for some increase in energy
usage (typically under 60\%) and area for on-chip
memory structures (110\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Zhang:2015:HDL,
author = "Danfeng Zhang and Yao Wang and G. Edward Suh and
Andrew C. Myers",
title = "A Hardware Design Language for Timing-Sensitive
Information-Flow Security",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "503--516",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694372",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Information security can be compromised by leakage via
low-level hardware features. One recently prominent
example is cache probing attacks, which rely on timing
channels created by caches. We introduce a hardware
design language, SecVerilog, which makes it possible to
statically analyze information flow at the hardware
level. With SecVerilog, systems can be built with
verifiable control of timing channels and other
information channels. SecVerilog is Verilog, extended
with expressive type annotations that enable precise
reasoning about information flow. It also comes with
rigorous formal assurance: we prove that SecVerilog
enforces timing-sensitive noninterference and thus
ensures secure information flow. By building a secure
MIPS processor and its caches, we demonstrate that
SecVerilog makes it possible to build complex hardware
designs with verified security, yet with low overhead
in time, space, and HW designer effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Hicks:2015:SLR,
author = "Matthew Hicks and Cynthia Sturton and Samuel T. King
and Jonathan M. Smith",
title = "{SPECS}: a Lightweight Runtime Mechanism for
Protecting Software from Security-Critical Processor
Bugs",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "517--529",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694366",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Processor implementation errata remain a problem, and
worse, a subset of these bugs are security-critical. We
classified 7 years of errata from recent commercial
processors to understand the magnitude and severity of
this problem, and found that of 301 errata analyzed, 28
are security-critical. We propose the SECURITY-CRITICAL
PROCESSOR ER- RATA CATCHING SYSTEM (SPECS) as a
low-overhead solution to this problem. SPECS employs a
dynamic verification strategy that is made lightweight
by limiting protection to only security-critical
processor state. As a proof-of- concept, we implement a
hardware prototype of SPECS in an open source
processor. Using this prototype, we evaluate SPECS
against a set of 14 bugs inspired by the types of
security-critical errata we discovered in the
classification phase. The evaluation shows that SPECS
is 86\% effective as a defense when deployed using only
ISA-level state; incurs less than 5\% area and power
overhead; and has no software run-time overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Duan:2015:AMF,
author = "Yuelu Duan and Nima Honarmand and Josep Torrellas",
title = "Asymmetric Memory Fences: Optimizing Both Performance
and Implementability",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "531--543",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694388",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There have been several recent efforts to improve the
performance of fences. The most aggressive designs
allow post-fence accesses to retire and complete before
the fence completes. Unfortunately, such designs
present implementation difficulties due to their
reliance on global state and structures. This paper's
goal is to optimize both the performance and the
implementability of fences. We start-off with a design
like the most aggressive ones but without the global
state. We call it Weak Fence or wF. Since the
concurrent execution of multiple wFs can deadlock, we
combine wFs with a conventional fence (i.e., Strong
Fence or sF) for the less performance-critical
thread(s). We call the result an Asymmetric fence
group. We also propose a taxonomy of Asymmetric fence
groups under TSO. Compared to past aggressive fences,
Asymmetric fence groups both are substantially easier
to implement and have higher average performance. The
two main designs presented (WS+ and W+) speed-up
workloads under TSO by an average of 13\% and 21\%,
respectively, over conventional fences.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Sung:2015:DES,
author = "Hyojin Sung and Sarita V. Adve",
title = "{DeNovoSync}: Efficient Support for Arbitrary
Synchronization without Writer-Initiated
Invalidations",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "545--559",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694356",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Current shared-memory hardware is complex and
inefficient. Prior work on the DeNovo coherence
protocol showed that disciplined shared-memory
programming models can enable more complexity-,
performance-, and energy-efficient hardware than the
state-of-the-art MESI protocol. DeNovo, however,
severely restricted the synchronization constructs an
application can support. This paper proposes
DeNovoSync, a technique to support arbitrary
synchronization in DeNovo. The key challenge is that
DeNovo exploits race-freedom to use reader-initiated
local self-invalidations (instead of conventional
writer-initiated remote cache invalidations) to ensure
coherence. Synchronization accesses are inherently racy
and not directly amenable to self-invalidations.
DeNovoSync addresses this challenge using a novel
combination of registration of all synchronization
reads with a judicious hardware backoff to limit
unnecessary registrations. For a wide variety of
synchronization constructs and applications, compared
to MESI, DeNovoSync shows comparable or up to 22\%
lower execution time and up to 58\% lower network
traffic, enabling DeNovo's advantages for a much
broader class of software than previously possible.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Sengupta:2015:HSD,
author = "Aritra Sengupta and Swarnendu Biswas and Minjia Zhang
and Michael D. Bond and Milind Kulkarni",
title = "Hybrid Static-Dynamic Analysis for Statically Bounded
Region Serializability",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "561--575",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694379",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data races are common. They are difficult to detect,
avoid, or eliminate, and programmers sometimes
introduce them intentionally. However, shared-memory
programs with data races have unexpected, erroneous
behaviors. Intentional and unintentional data races
lead to atomicity and sequential consistency (SC)
violations, and they make it more difficult to
understand, test, and verify software. Existing
approaches for providing stronger guarantees for racy
executions add high run-time overhead and/or rely on
custom hardware. This paper shows how to provide
stronger semantics for racy programs while providing
relatively good performance on commodity systems. A
novel hybrid static--dynamic analysis called
\emph{EnfoRSer} provides end-to-end support for a
memory model called \emph{statically bounded region
serializability} (SBRS) that is not only stronger than
weak memory models but is strictly stronger than SC.
EnfoRSer uses static compiler analysis to transform
regions, and dynamic analysis to detect and resolve
conflicts at run time. By demonstrating commodity
support for a reasonably strong memory model with
reasonable overheads, we show its potential as an
always-on execution model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Alglave:2015:GCW,
author = "Jade Alglave and Mark Batty and Alastair F. Donaldson
and Ganesh Gopalakrishnan and Jeroen Ketema and Daniel
Poetzl and Tyler Sorensen and John Wickerson",
title = "{GPU} Concurrency: Weak Behaviours and Programming
Assumptions",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "577--591",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694391",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrency is pervasive and perplexing, particularly
on graphics processing units (GPUs). Current
specifications of languages and hardware are
inconclusive; thus programmers often rely on folklore
assumptions when writing software. To remedy this state
of affairs, we conducted a large empirical study of the
concurrent behaviour of deployed GPUs. Armed with
litmus tests (i.e. short concurrent programs), we
questioned the assumptions in programming guides and
vendor documentation about the guarantees provided by
hardware. We developed a tool to generate thousands of
litmus tests and run them under stressful workloads. We
observed a litany of previously elusive weak
behaviours, and exposed folklore beliefs about GPU
programming---often supported by official
tutorials---as false. As a way forward, we propose a
model of Nvidia GPU hardware, which correctly models
every behaviour witnessed in our experiments. The model
is a variant of SPARC Relaxed Memory Order (RMO),
structured following the GPU concurrency hierarchy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Park:2015:CCP,
author = "Jason Jong Kyu Park and Yongjun Park and Scott
Mahlke",
title = "{Chimera}: Collaborative Preemption for Multitasking
on a Shared {GPU}",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "593--606",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694346",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The demand for multitasking on graphics processing
units (GPUs) is constantly increasing as they have
become one of the default components on modern computer
systems along with traditional processors (CPUs).
Preemptive multitasking on CPUs has been primarily
supported through context switching. However, the same
preemption strategy incurs substantial overhead due to
the large context in GPUs. The overhead comes in two
dimensions: a preempting kernel suffers from a long
preemption latency, and the system throughput is wasted
during the switch. Without precise control over the
large preemption overhead, multitasking on GPUs has
little use for applications with strict latency
requirements. In this paper, we propose Chimera, a
collaborative preemption approach that can precisely
control the overhead for multitasking on GPUs. Chimera
first introduces streaming multiprocessor (SM)
flushing, which can instantly preempt an SM by
detecting and exploiting idempotent execution. Chimera
utilizes flushing collaboratively with two previously
proposed preemption techniques for GPUs, namely context
switching and draining to minimize throughput overhead
while achieving a required preemption latency.
Evaluations show that Chimera violates the deadline for
only 0.2\% of preemption requests when a 15us
preemption latency constraint is used. For
multi-programmed workloads, Chimera can improve the
average normalized turnaround time by 5.5x, and system
throughput by 12.2\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Agarwal:2015:PPS,
author = "Neha Agarwal and David Nellans and Mark Stephenson and
Mike O'Connor and Stephen W. Keckler",
title = "Page Placement Strategies for {GPUs} within
Heterogeneous Memory Systems",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "607--618",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694381",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Systems from smartphones to supercomputers are
increasingly heterogeneous, being composed of both CPUs
and GPUs. To maximize cost and energy efficiency, these
systems will increasingly use globally-addressable
heterogeneous memory systems, making choices about
memory page placement critical to performance. In this
work we show that current page placement policies are
not sufficient to maximize GPU performance in these
heterogeneous memory systems. We propose two new page
placement policies that improve GPU performance: one
application agnostic and one using application profile
information. Our application agnostic policy,
bandwidth-aware (BW-AWARE) placement, maximizes GPU
throughput by balancing page placement across the
memories based on the aggregate memory bandwidth
available in a system. Our simulation-based results
show that BW-AWARE placement outperforms the existing
Linux INTERLEAVE and LOCAL policies by 35\% and 18\% on
average for GPU compute workloads. We build upon
BW-AWARE placement by developing a compiler-based
profiling mechanism that provides programmers with
information about GPU application data structure access
patterns. Combining this information with simple
program-annotated hints about memory placement, our
hint-based page placement approach performs within 90\%
of oracular page placement on average, largely
mitigating the need for costly dynamic page tracking
and migration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Zhao:2015:FPS,
author = "Zhijia Zhao and Xipeng Shen",
title = "On-the-Fly Principled Speculation for {FSM}
Parallelization",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "619--630",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694369",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Finite State Machine (FSM) is the backbone of an
important class of applications in many domains. Its
parallelization has been extremely difficult due to
inherent strong dependences in the computation.
Recently, principled speculation shows good promise to
solve the problem. However, the reliance on offline
training makes the approach inconvenient to adopt and
hard to apply to many practical FSM applications, which
often deal with a large variety of inputs different
from training inputs. This work presents an assembly of
techniques that completely remove the needs for offline
training. The techniques include a set of theoretical
results on inherent properties of FSMs, and two newly
designed dynamic optimizations for efficient FSM
characterization. The new techniques, for the first
time, make principle speculation applicable on the fly,
and enables swift, automatic configuration of
speculative parallelizations to best suit a given FSM
and its current input. They eliminate the fundamental
barrier for practical adoption of principle speculation
for FSM parallelization. Experiments show that the new
techniques give significantly higher speedups for some
difficult FSM applications in the presence of input
changes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{David:2015:ACS,
author = "Tudor David and Rachid Guerraoui and Vasileios
Trigonakis",
title = "Asynchronized Concurrency: The Secret to Scaling
Concurrent Search Data Structures",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "631--644",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694359",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce ``asynchronized concurrency (ASCY),'' a
paradigm consisting of four complementary programming
patterns. ASCY calls for the design of concurrent
search data structures (CSDSs) to resemble that of
their sequential counterparts. We argue that ASCY leads
to implementations which are portably scalable: they
scale across different types of hardware platforms,
including single and multi-socket ones, for various
classes of workloads, such as read-only and read-write,
and according to different performance metrics,
including throughput, latency, and energy. We
substantiate our thesis through the most exhaustive
evaluation of CSDSs to date, involving 6 platforms, 22
state-of-the-art CSDS algorithms, 10 re-engineered
state-of-the-art CSDS algorithms following the ASCY
patterns, and 2 new CSDS algorithms designed with ASCY
in mind. We observe up to 30\% improvements in
throughput in the re-engineered algorithms, while our
new algorithms out-perform the state-of-the-art
alternatives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Bhatotia:2015:ITL,
author = "Pramod Bhatotia and Pedro Fonseca and Umut A. Acar and
Bj{\"o}rn B. Brandenburg and Rodrigo Rodrigues",
title = "{iThreads}: a Threading Library for Parallel
Incremental Computation",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "645--659",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694371",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Incremental computation strives for efficient
successive runs of applications by re-executing only
those parts of the computation that are affected by a
given input change instead of recomputing everything
from scratch. To realize these benefits automatically,
we describe iThreads, a threading library for parallel
incremental computation. iThreads supports unmodified
shared-memory multithreaded programs: it can be used as
a replacement for pthreads by a simple exchange of
dynamically linked libraries, without even recompiling
the application code. To enable such an interface, we
designed algorithms and an implementation to operate at
the compiled binary code level by leveraging
MMU-assisted memory access tracking and process-based
thread isolation. Our evaluation on a multicore
platform using applications from the PARSEC and Phoenix
benchmarks and two case-studies shows significant
performance gains.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Gidra:2015:NGC,
author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and
Marc Shapiro and Nhan Nguyen",
title = "{NumaGiC}: a Garbage Collector for Big Data on Big
{NUMA} Machines",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "661--673",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694361",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "On contemporary cache-coherent Non-Uniform Memory
Access (ccNUMA) architectures, applications with a
large memory footprint suffer from the cost of the
garbage collector (GC), because, as the GC scans the
reference graph, it makes many remote memory accesses,
saturating the interconnect between memory nodes. We
address this problem with NumaGiC, a GC with a
mostly-distributed design. In order to maximise memory
access locality during collection, a GC thread avoids
accessing a different memory node, instead notifying a
remote GC thread with a message; nonetheless, NumaGiC
avoids the drawbacks of a pure distributed design,
which tends to decrease parallelism. We compare NumaGiC
with Parallel Scavenge and NAPS on two different ccNUMA
architectures running on the Hotspot Java Virtual
Machine of OpenJDK 7. On Spark and Neo4j, two
industry-strength analytics applications, with heap
sizes ranging from 160GB to 350GB, and on SPECjbb2013
and SPECjbb2005, our gc improves overall performance by
up to 45\% over NAPS (up to 94\% over Parallel
Scavenge), and increases the performance of the
collector itself by up to 3.6x over NAPS (up to 5.4x
over Parallel Scavenge).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Nguyen:2015:FCR,
author = "Khanh Nguyen and Kai Wang and Yingyi Bu and Lu Fang
and Jianfei Hu and Guoqing Xu",
title = "{FACADE}: a Compiler and Runtime for (Almost)
Object-Bounded Big Data Applications",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "675--690",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694345",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The past decade has witnessed the increasing demands
on data-driven business intelligence that led to the
proliferation of data-intensive applications. A managed
object-oriented programming language such as Java is
often the developer's choice for implementing such
applications, due to its quick development cycle and
rich community resource. While the use of such
languages makes programming easier, their automated
memory management comes at a cost. When the managed
runtime meets Big Data, this cost is significantly
magnified and becomes a scalability-prohibiting
bottleneck. This paper presents a novel compiler
framework, called Facade, that can generate
highly-efficient data manipulation code by
automatically transforming the data path of an existing
Big Data application. The key treatment is that in the
generated code, the number of runtime heap objects
created for data types in each thread is (almost)
statically bounded, leading to significantly reduced
memory management cost and improved scalability. We
have implemented Facade and used it to transform 7
common applications on 3 real-world, already
well-optimized Big Data frameworks: GraphChi, Hyracks,
and GPS. Our experimental results are very positive:
the generated programs have (1) achieved a 3\%--48\%
execution time reduction and an up to 88X GC reduction;
(2) consumed up to 50\% less memory, and (3) scaled to
much larger datasets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Agrawal:2015:ASD,
author = "Varun Agrawal and Abhiroop Dabral and Tapti Palit and
Yongming Shen and Michael Ferdman",
title = "Architectural Support for Dynamic Linking",
journal = j-SIGPLAN,
volume = "50",
number = "4",
pages = "691--702",
month = apr,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2775054.2694392",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue May 12 17:41:19 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "All software in use today relies on libraries,
including standard libraries (e.g., C, C++) and
application-specific libraries (e.g., libxml, libpng).
Most libraries are loaded in memory and dynamically
linked when programs are launched, resolving symbol
addresses across the applications and libraries.
Dynamic linking has many benefits: It allows code to be
reused between applications, conserves memory (because
only one copy of a library is kept in memory for all
the applications that share it), and allows libraries
to be patched and updated without modifying programs,
among numerous other benefits. However, these benefits
come at the cost of performance. For every call made to
a function in a dynamically linked library, a
trampoline is used to read the function address from a
lookup table and branch to the function, incurring
memory load and branch operations. Static linking
avoids this performance penalty, but loses all the
benefits of dynamic linking. Given its myriad benefits,
dynamic linking is the predominant choice today,
despite the performance cost. In this work, we propose
a speculative hardware mechanism to optimize dynamic
linking by avoiding executing the trampolines for
library function calls, providing the benefits of
dynamic linking with the performance of static linking.
Speculatively skipping the memory load and branch
operations of the library call trampolines improves
performance by reducing the number of executed
instructions and gains additional performance by
reducing pressure on the instruction and data caches,
TLBs, and branch predictors. Because the indirect
targets of library call trampolines do not change
during program execution, our speculative mechanism
never misspeculates in practice. We evaluate our
technique on real hardware with production software and
observe up to 4\% speedup using only 1.5KB of on-chip
storage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '15 conference proceedings.",
}
@Article{Baird:2015:OTC,
author = "Ryan Baird and Peter Gavin and Magnus Sj{\"a}lander
and David Whalley and Gang-Ryung Uh",
title = "Optimizing Transfers of Control in the Static Pipeline
Architecture",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "1:1--1:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754952",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Statically pipelined processors offer a new way to
improve the performance beyond that of a traditional
in-order pipeline while simultaneously reducing energy
usage by enabling the compiler to control more
fine-grained details of the program execution. This
paper describes how a compiler can exploit the features
of the static pipeline architecture to apply
optimizations on transfers of control that are not
possible on a conventional architecture. The
optimizations presented in this paper include hoisting
the target address calculations for branches, jumps,
and calls out of loops, performing branch chaining
between calls and jumps, hoisting the setting of return
addresses out of loops, and exploiting conditional
calls and returns. The benefits of performing these
transfer of control optimizations include a 6.8\%
reduction in execution time and a 3.6\% decrease in
estimated energy usage.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Liu:2015:CCD,
author = "Qingrui Liu and Changhee Jung and Dongyoon Lee and
Devesh Tiwari",
title = "{Clover}: Compiler Directed Lightweight Soft Error
Resilience",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "2:1--2:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754959",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents Clover, a compiler directed soft
error detection and recovery scheme for lightweight
soft error resilience. The compiler carefully generates
soft error tolerant code based on idempotent processing
without explicit checkpoint. During program execution,
Clover relies on a small number of acoustic wave
detectors deployed in the processor to identify soft
errors by sensing the wave made by a particle strike.
To cope with DUE (detected unrecoverable errors) caused
by the sensing latency of error detection, Clover
leverages a novel selective instruction duplication
technique called tail-DMR (dual modular redundancy).
Once a soft error is detected by either the sensor or
the tail-DMR, Clover takes care of the error as in the
case of exception handling. To recover from the error,
Clover simply redirects program control to the
beginning of the code region where the error is
detected. The experiment results demonstrate that the
average runtime overhead is only 26\%, which is a 75\%
reduction compared to that of the state-of-the-art soft
error resilience technique.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Bardizbanyan:2015:IDA,
author = "Alen Bardizbanyan and Magnus Sj{\"a}lander and David
Whalley and Per Larsson-Edefors",
title = "Improving Data Access Efficiency by Using
Context-Aware Loads and Stores",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "3:1--3:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754960",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Memory operations have a significant impact on both
performance and energy usage even when an access hits
in the level-one data cache (L1 DC). Load instructions
in particular affect performance as they frequently
result in stalls since the register to be loaded is
often referenced before the data is available in the
pipeline. L1 DC accesses also impact energy usage as
they typically require significantly more energy than a
register file access. Despite their impact on
performance and energy usage, L1 DC accesses on most
processors are performed in a general fashion without
regard to the context in which the load or store
operation is performed. We describe a set of techniques
where the compiler enhances load and store instructions
so that they can be executed with fewer stalls and/or
enable the L1 DC to be accessed in a more
energy-efficient manner. We show that using these
techniques can simultaneously achieve a 6\% gain in
performance and a 43\% reduction in L1 DC energy
usage.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Stilkerich:2015:PGA,
author = "Isabella Stilkerich and Clemens Lang and Christoph
Erhardt and Michael Stilkerich",
title = "A Practical Getaway: Applications of Escape Analysis
in Embedded Real-Time Systems",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "4:1--4:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754961",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The use of a managed, type-safe language such as Java
in real-time and embedded systems offers productivity
and, in particular, safety and dependability benefits
at a reasonable cost. It has been shown for commodity
systems that escape analysis (EA) enables a set of
useful optimization, and benefits from the properties
of a type-safe language. In this paper, we explore the
application of escape analysis in KESO [34], a Java
ahead-of-time compiler targeting (deeply) embedded
real-time systems. We present specific applications of
EA for embedded programs that go beyond the widely
known stack-allocation and synchronization
optimizations such as extended remote procedure call
support for software-isolated applications, automated
inference of immutable data or improved upper space and
time bounds for worst-case estimations.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Manilov:2015:FRT,
author = "Stanislav Manilov and Bj{\"o}rn Franke and Anthony
Magrath and Cedric Andrieu",
title = "Free Rider: a Tool for Retargeting Platform-Specific
Intrinsic Functions",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "5:1--5:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754962",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Short-vector SIMD and DSP instructions are popular
extensions to common ISAs. These extensions deliver
excellent performance and compact code for some
compute-intensive applications, but they require
specialised compiler support. To enable the programmer
to explicitly request the use of such an instruction,
many C compilers provide platform-specific intrinsic
functions, whose implementation is handled specially by
the compiler. The use of such intrinsics, however,
inevitably results in non-portable code. In this paper
we develop a novel methodology for retargeting such
non-portable code, which maps intrinsics from one
platform to another, taking advantage of similar
intrinsics on the target platform. We employ a
description language to specify the signature and
semantics of intrinsics and perform graph-based pattern
matching and high-level code transformations to derive
optimised implementations exploiting the target's
intrinsics, wherever possible. We demonstrate the
effectiveness of our new methodology, implemented in
the FREE RIDER tool, by automatically retargeting
benchmarks derived from OpenCV samples and a complex
embedded application optimised to run on an Arm
Cortex-M4 to an Intel Edison module with Sse4.2
instructions. We achieve a speedup of up to 3.73 over a
plain C baseline, and on average 96.0\% of the speedup
of manually ported and optimised versions of the
benchmarks.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Dietrich:2015:CKC,
author = "Christian Dietrich and Martin Hoffmann and Daniel
Lohmann",
title = "Cross-Kernel Control-Flow--Graph Analysis for
Event-Driven Real-Time Systems",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "6:1--6:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754963",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded real-time control systems generally have a
dedicated purpose and fixed set of functionalities.
This manifests in a large amount of implicit and
explicit static knowledge, available already at compile
time. Modern compilers can extract and exploit this
information to perform extensive whole-program analyses
and interprocedural optimizations. However, these
analyses typically end at the application--kernel
boundary, thus control-flow transitions between
different threads are not covered, yet. This
restriction stems from the pessimistic assumption of a
probabilistic scheduling policy of the underlying
operating system, impeding detailed predictions of the
overall system behavior. Real-time operating systems,
however, do provide deterministic and exactly specified
scheduling decisions, as embedded control systems rely
on a timely and precise behavior. In this paper, we
present an approach that incorporates the RTOS
semantics into the control-flow analysis, to cross the
application--kernel boundary. By combining operating
system semantics, the static system configuration and
the application logic, we determine a cross-kernel
control-flow--graph, that provides a global view on all
possible execution paths of a real-time system. Having
this knowledge at hand, enables us to tailor the
operating system kernel more closely to the particular
application scenario. On the example of a real-world
safety-critical control system, we present two possible
use cases: Run-time optimizations, by means of
specialized system calls for each call site, allow to
speed up the kernel execution path by 33 percent in our
benchmark scenario. An automated generation of OS state
assertions on the expected system behavior, targeting
transient hardware fault tolerance, leverages
significant robustness improvements.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Ghosh:2015:EEA,
author = "Soumyadeep Ghosh and Yongjun Park and Arun Raman",
title = "Enabling Efficient Alias Speculation",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "7:1--7:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754964",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Microprocessors designed using HW/SW codesign
principles, such as Transmeta{\TM} Efficeon{\TM} and
the soon-to-ship NVIDIA 64-bit Tegra{\reg} K1, use
dynamic binary optimization to extract
instruction-level parallelism. Many code optimizations
are made significantly more effective through the use
of alias speculation. The state-of-the-art alias
speculation system, SMARQ, provides 40\% speedup on
average over a system with no alias speculation. This
performance, however, comes at the cost of introducing
new alias registers and increased power consumption due
to new checks for validating speculation. Consequently,
improving the efficiency of alias speculation by
reducing alias register requirements and rationalizing
speculation validation checks is critical for the
viability of SMARQ. This paper presents alias
coalescing, a novel technique to significantly improve
the efficiency of SMARQ through a synergistic
combination of compiler and microarchitectural
techniques. By using a more compact encoding for memory
access ranges for memory instructions, alias coalescing
simultaneously reduces the alias register pressure in
SMARQ by a geomean of 26.09\% and 39.96\%, and the
dynamic alias checks by 20.73\% and 33.87\%, across the
entire SPEC CINT2006 and SPEC CFP2006 suites
respectively.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Zheng:2015:WAD,
author = "Wenguang Zheng and Hui Wu",
title = "{WCET-Aware} Dynamic {D}-cache Locking for a Single
Task",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "8:1--8:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754965",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Caches have been extensively used to bridge the
increasing speed gap between processors and off-chip
memory. However, caches make it much harder to compute
the WCET (Worst-Case Execution Time) of a program.
Cache locking is an effective technique for overcoming
the unpredictability problem of caches. We investigate
the WCET aware D-cache locking problem for a single
task, and propose two dynamic cache locking approaches.
The first approach formulates the problem as a global
ILP (Integer Linear Programming) problem that
simultaneously selects a near-optimal set of variables
as the locked cache contents and allocates them to the
D-cache. The second one iteratively constructs a
subgraph of the CFG of the task where the lengths of
all the paths are close to the longest path length, and
uses an ILP formulation to select a near-optimal set of
variables in the subgraph as the locked cache contents
and allocate them to the D-cache. For both approaches,
we propose a novel, efficient D-cache allocation
algorithm. We have implemented both approaches and
compared them with the longest path-based, dynamic
cache locking approach proposed in [22] and the static
WCET analysis approach without cache locking proposed
in [14] by using a set of benchmarks from the
M{\"a}lardalen WCET benchmark suite, SNU real-time
benchmarks and the benchmarks used in [27]. Compared to
the static WCET analysis approach, the average WCET
improvements of the first approach range between 11.3\%
and 31.6\%, and the average WCET improvements of the
second approach range between 12.3\% and 32.9\%.
Compared to the longest path-based, dynamic cache
locking approach, the average WCET improvements of the
first approach range between 4.7\% and 14.3\%, and the
average WCET improvements of the second approach range
between 5.3\% and 15.0\%.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Lin:2015:STU,
author = "Yixiao Lin and Sayan Mitra",
title = "{StarL}: Towards a Unified Framework for Programming,
Simulating and Verifying Distributed Robotic Systems",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "9:1--9:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754966",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We developed StarL as a framework for programming,
simulating, and verifying distributed systems that
interacts with physical processes. StarL framework has
(a) a collection of distributed primitives for
coordination, such as mutual exclusion, registration
and geocast that can be used to build sophisticated
applications, (b) theory libraries for verifying StarL
applications in the PVS theorem prover, and (c) an
execution environment that can be used to deploy the
applications on hardware or to execute them in a
discrete event simulator. The primitives have (i)
abstract, nondeterministic specifications in terms of
invariants, and assume-guarantee style progress
properties, (ii) implementations in Java/Android that
always satisfy the invariants and attempt progress
using best effort strategies. The PVS theories specify
the invariant and progress properties of the
primitives, and have to be appropriately instantiated
and composed with the application's state machine to
prove properties about the application. We have built
two execution environments: one for deploying
applications on Android/iRobot Create platform and a
second one for simulating large instantiations of the
applications in a discrete even simulator. The
capabilities are illustrated with a StarL application
for vehicle to vehicle coordination in an automatic
intersection that uses primitives for point-to-point
motion, mutual exclusion, and registration.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Zhang:2015:IPA,
author = "Zhenkai Zhang and Xenofon Koutsoukos",
title = "Improving the Precision of Abstract Interpretation
Based Cache Persistence Analysis",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "10:1--10:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754967",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When designing hard real-time embedded systems, it is
required to estimate the worst-case execution time
(WCET) of each task for schedulability analysis.
Precise cache persistence analysis can significantly
tighten the WCET estimation, especially when the
program has many loops. Methods for persistence
analysis should safely and precisely classify memory
references as persistent. Existing safe approaches
suffer from multiple sources of pessimism and may not
provide precise results. In this paper, we first
identify some sources of pessimism that two recent
approaches based on younger set and may analysis may
encounter. Then, we propose two methods to eliminate
these sources of pessimism. The first method improves
the update function of the may analysis-based approach;
and the second method integrates the younger set-based
and may analysis-based approaches together to further
reduce pessimism. We also prove the two proposed
methods are still safe. We evaluate the approaches on a
set of benchmarks and observe the number of memory
references classified as persistent is increased by the
proposed methods. Moreover, we empirically compare the
storage space and analysis time used by different
methods.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Barijough:2015:IAM,
author = "Kamyar Mirzazad Barijough and Matin Hashemi and
Volodymyr Khibin and Soheil Ghiasi",
title = "Implementation-Aware Model Analysis: The Case of
Buffer-Throughput Tradeoff in Streaming Applications",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "11:1--11:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754968",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Models of computation abstract away a number of
implementation details in favor of well-defined
semantics. While this has unquestionable benefits, we
argue that analysis of models solely based on
operational semantics (implementation-oblivious
analysis) is unfit to drive implementation design space
exploration. Specifically, we study the tradeoff
between buffer size and streaming throughput in
applications modeled as synchronous data flow (SDF)
graphs. We demonstrate the inherent inaccuracy of
implementation-oblivious approach, which only considers
SDF operational semantic. We propose a rigorous
transformation, which equips the state of the art
buffer-throughput tradeoff analysis technique with
implementation awareness. Extensive empirical
evaluation show that our approach results in
significantly more accurate estimates in streaming
throughput at the model level, while running two orders
of magnitude faster than cycle-accurate simulation of
implementations.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Liu:2015:SDS,
author = "Chen Liu and Chengmo Yang",
title = "Secure and Durable {(SEDURA)}: an Integrated
Encryption and Wear-leveling Framework for {PCM}-based
Main Memory",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "12:1--12:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754969",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Phase changing memory (PCM) is considered a promising
candidate for next-generation main-memory. Despite its
advantages of lower power and high density, PCM faces
critical security challenges due to its non-volatility:
data are still accessible by the attacker even if the
device is detached from a power supply. While
encryption has been widely adopted as the solution to
protect data, it not only creates additional
performance and energy overhead during data
encryption/decryption, but also hurts PCM lifetime by
introducing more writes to PCM cells. In this paper, we
propose a framework that integrates encryption and
wear-leveling so as to mitigate the adverse impact of
encryption on PCM performance and lifetime. Moreover,
by randomizing the address space during wear-leveling,
an extra level of protection is provided to the data in
memory. We propose two algorithms that respectively
prioritize data security and memory lifetime, allowing
designers to trade-off between these two factors based
on their needs. Compared to previous encryption
techniques, the proposed SEDURA framework is able to
deliver both more randomness to protect data and more
balanced PCM writes, thus effectively balancing the
three aspects of data security, application
performance, and device lifetime.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Procter:2015:SDH,
author = "Adam Procter and William L. Harrison and Ian Graves
and Michela Becchi and Gerard Allwein",
title = "Semantics Driven Hardware Design, Implementation, and
Verification with {ReWire}",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "13:1--13:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754970",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There is no such thing as high assurance without high
assurance hardware. High assurance hardware is
essential, because any and all high assurance systems
ultimately depend on hardware that conforms to, and
does not undermine, critical system properties and
invariants. And yet, high assurance hardware
development is stymied by the conceptual gap between
formal methods and hardware description languages used
by engineers. This paper presents ReWire, a functional
programming language providing a suitable foundation
for formal verification of hardware designs, and a
compiler for that language that translates high-level,
semantics-driven designs directly into working
hardware. ReWire's design and implementation are
presented, along with a case study in the design of a
secure multicore processor, demonstrating both ReWire's
expressiveness as a programming language and its power
as a framework for formal, high-level reasoning about
hardware systems.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Woithe:2015:TPA,
author = "Hans Christian Woithe and Ulrich Kremer",
title = "{TrilobiteG}: a programming architecture for
autonomous underwater vehicles",
journal = j-SIGPLAN,
volume = "50",
number = "5",
pages = "14:1--14:??",
month = may,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2808704.2754971",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jul 31 19:39:44 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming autonomous systems can be challenging
because many programming decisions must be made in real
time and under stressful conditions, such as on a
battle field, during a short communication window, or
during a storm at sea. As such, new programming designs
are needed to reflect these specific and extreme
challenges. TrilobiteG is a programming architecture
for buoyancy-driven autonomous underwater vehicles
(AUVs), called gliders. Gliders are designed to spend
weeks to months in the ocean, where they operate fully
autonomously while submerged and can only communicate
via satellite during their limited time at the surface.
Based on the experience gained from a seven year long
collaboration with two oceanographic institutes, the
TrilobiteG architecture has been developed with the
main goal of enabling users to run more effective
missions. The TrilobiteG programming environment
consists of a domain-specific language called ALGAE, a
lower level service layer, and a set of real-time and
faster-than-real-time simulators. The system has been
used to program novel and robust glider behaviors, as
well as to find software problems that otherwise may
have remained undetected, with potentially catastrophic
results. We believe that TrilobiteG can serve as a
blueprint for other autonomous systems as well, and
that TrilobiteG will motivate and enable a broader
scientific community to work on extreme, real-world
problems by using the simulation infrastructure.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '15 conference proceedings.",
}
@Article{Panchekha:2015:AIA,
author = "Pavel Panchekha and Alex Sanchez-Stern and James R.
Wilcox and Zachary Tatlock",
title = "Automatically improving accuracy for floating point
expressions",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "1--11",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737959",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scientific and engineering applications depend on
floating point arithmetic to approximate real
arithmetic. This approximation introduces rounding
error, which can accumulate to produce unacceptable
results. While the numerical methods literature
provides techniques to mitigate rounding error,
applying these techniques requires manually rearranging
expressions and understanding the finer details of
floating point arithmetic. We introduce Herbie, a tool
which automatically discovers the rewrites experts
perform to improve accuracy. Herbie's heuristic search
estimates and localizes rounding error using sampled
points (rather than static error analysis), applies a
database of rules to generate improvements, takes
series expansions, and combines improvements for
different input regions. We evaluated Herbie on
examples from a classic numerical methods textbook, and
found that Herbie was able to improve accuracy on each
example, some by up to 60 bits, while imposing a median
performance overhead of 40\%. Colleagues in machine
learning have used Herbie to significantly improve the
results of a clustering algorithm, and a mathematical
library has accepted two patches generated using
Herbie.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Zhang:2015:DTE,
author = "Danfeng Zhang and Andrew C. Myers and Dimitrios
Vytiniotis and Simon Peyton-Jones",
title = "Diagnosing type errors with class",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "12--21",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738009",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type inference engines often give terrible error
messages, and the more sophisticated the type system
the worse the problem. We show that even with the
highly expressive type system implemented by the
Glasgow Haskell Compiler (GHC)--including type classes,
GADTs, and type families--it is possible to identify
the most likely source of the type error, rather than
the first source that the inference engine trips over.
To determine which are the likely error sources, we
apply a simple Bayesian model to a graph representation
of the typing constraints; the satisfiability or
unsatisfiability of paths within the graph provides
evidence for or against possible explanations. While we
build on prior work on error diagnosis for simpler type
systems, inference in the richer type system of Haskell
requires extending the graph with new nodes. The
augmentation of the graph creates challenges both for
Bayesian reasoning and for ensuring termination. Using
a large corpus of Haskell programs, we show that this
error localization technique is practical and
significantly improves accuracy over the state of the
art.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Lopes:2015:PCP,
author = "Nuno P. Lopes and David Menendez and Santosh
Nagarakatte and John Regehr",
title = "Provably correct peephole optimizations with {Alive}",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "22--32",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737965",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compilers should not miscompile. Our work addresses
problems in developing peephole optimizations that
perform local rewriting to improve the efficiency of
LLVM code. These optimizations are individually
difficult to get right, particularly in the presence of
undefined behavior; taken together they represent a
persistent source of bugs. This paper presents Alive, a
domain-specific language for writing optimizations and
for automatically either proving them correct or else
generating counterexamples. Furthermore, Alive can be
automatically translated into C++ code that is suitable
for inclusion in an LLVM optimization pass. Alive is
based on an attempt to balance usability and formal
methods; for example, it captures---but largely
hides---the detailed semantics of three different kinds
of undefined behavior in LLVM. We have translated more
than 300 LLVM optimizations into Alive and, in the
process, found that eight of them were wrong.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Faddegon:2015:ADR,
author = "Maarten Faddegon and Olaf Chitil",
title = "Algorithmic debugging of real-world {Haskell}
programs: deriving dependencies from the cost centre
stack",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "33--42",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737985",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing algorithmic debuggers for Haskell require a
transformation of all modules in a program, even
libraries that the user does not want to debug and
which may use language features not supported by the
debugger. This is a pity, because a promising approach
to debugging is therefore not applicable to many
real-world programs. We use the cost centre stack from
the Glasgow Haskell Compiler profiling environment
together with runtime value observations as provided by
the Haskell Object Observation Debugger (HOOD) to
collect enough information for algorithmic debugging.
Program annotations are in suspected modules only. With
this technique algorithmic debugging is applicable to a
much larger set of Haskell programs. This demonstrates
that for functional languages in general a simple stack
trace extension is useful to support tasks such as
profiling and debugging.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Sidiroglou-Douskos:2015:AEE,
author = "Stelios Sidiroglou-Douskos and Eric Lahtinen and Fan
Long and Martin Rinard",
title = "Automatic error elimination by horizontal code
transfer across multiple applications",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "43--54",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737988",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Code Phage (CP), a system for automatically
transferring correct code from donor applications into
recipient applications that process the same inputs to
successfully eliminate errors in the recipient.
Experimental results using seven donor applications to
eliminate ten errors in seven recipient applications
highlight the ability of CP to transfer code across
applications to eliminate out of bounds access, integer
overflow, and divide by zero errors. Because CP works
with binary donors with no need for source code or
symbolic information, it supports a wide range of use
cases. To the best of our knowledge, CP is the first
system to automatically transfer code across multiple
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Liu:2015:LRT,
author = "Peng Liu and Xiangyu Zhang and Omer Tripp and Yunhui
Zheng",
title = "{Light}: replay via tightly bounded recording",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "55--64",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738001",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reproducing concurrency bugs is a prominent challenge.
Existing techniques either rely on recording very fine
grained execution information and hence have high
runtime overhead, or strive to log as little
information as possible but provide no guarantee in
reproducing a bug. We present Light, a technique that
features much lower overhead compared to techniques
based on fine grained recording, and that guarantees to
reproduce concurrent bugs. We leverage and formally
prove that recording flow dependences is the necessary
and sufficient condition to reproduce a concurrent bug.
The flow dependences, together with the thread local
orders that can be automatically inferred (and hence
not logged), are encoded as scheduling constraints. An
SMT solver is used to derive a replay schedule, which
is guaranteed to exist even though it may be different
from the original schedule. Our experiments show that
Light has only 44\% logging overhead, almost one order
of magnitude lower than the state of the art techniques
relying on logging memory accesses. Its space overhead
is only 10\% of those techniques. Light can also
reproduce all the bugs we have collected whereas
existing techniques miss some of them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Lidbury:2015:MCC,
author = "Christopher Lidbury and Andrei Lascu and Nathan Chong
and Alastair F. Donaldson",
title = "Many-core compiler fuzzing",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "65--76",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737986",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We address the compiler correctness problem for
many-core systems through novel applications of fuzz
testing to OpenCL compilers. Focusing on two methods
from prior work, random differential testing and
testing via equivalence modulo inputs (EMI), we present
several strategies for random generation of
deterministic, communicating OpenCL kernels, and an
injection mechanism that allows EMI testing to be
applied to kernels that otherwise exhibit little or no
dynamically-dead code. We use these methods to conduct
a large, controlled testing campaign with respect to 21
OpenCL (device, compiler) configurations, covering a
range of CPU, GPU, accelerator, FPGA and emulator
implementations. Our study provides independent
validation of claims in prior work related to the
effectiveness of random differential testing and EMI
testing, proposes novel methods for lifting these
techniques to the many-core setting and reveals a
significant number of OpenCL compiler bugs in
commercial implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Sergey:2015:MVF,
author = "Ilya Sergey and Aleksandar Nanevski and Anindya
Banerjee",
title = "Mechanized verification of fine-grained concurrent
programs",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "77--87",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737964",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficient concurrent programs and data structures
rarely employ coarse-grained synchronization mechanisms
(i.e., locks); instead, they implement custom
synchronization patterns via fine-grained primitives,
such as compare-and-swap. Due to sophisticated
interference scenarios between threads, reasoning about
such programs is challenging and error-prone, and can
benefit from mechanization. In this paper, we present
the first completely formalized framework for
mechanized verification of full functional correctness
of fine-grained concurrent programs. Our tool is based
on the recently proposed program logic FCSL. It is
implemented as an embedded DSL in the dependently-typed
language of the Coq proof assistant, and is powerful
enough to reason about programming features such as
higher-order functions and local thread spawning. By
incorporating a uniform concurrency model, based on
state-transition systems and partial commutative
monoids, FCSL makes it possible to build proofs about
concurrent libraries in a thread-local, compositional
way, thus facilitating scalability and reuse: libraries
are verified just once, and their specifications are
used ubiquitously in client-side reasoning. We
illustrate the proof layout in FCSL by example, outline
its infrastructure, and report on our experience of
using FCSL to verify a number of concurrent algorithms
and data structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Sharma:2015:VPC,
author = "Rahul Sharma and Michael Bauer and Alex Aiken",
title = "Verification of producer-consumer synchronization in
{GPU} programs",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "88--98",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737962",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Previous efforts to formally verify code written for
GPUs have focused solely on kernels written within the
traditional data-parallel GPU programming model. No
previous work has considered the higher performance,
but more complex, warp-specialized kernels based on
producer-consumer named barriers available on current
hardware. In this work we present the first formal
operational semantics for named barriers and define
what it means for a warp-specialized kernel to be
correct. We give algorithms for verifying the
correctness of warp-specialized kernels and prove that
they are both sound and complete for the most common
class of warp-specialized programs. We also present
WEFT, a verification tool for checking warp-specialized
code. Using WEFT, we discover several non-trivial bugs
in production warp-specialized kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Gammie:2015:RSV,
author = "Peter Gammie and Antony L. Hosking and Kai
Engelhardt",
title = "Relaxing safely: verified on-the-fly garbage
collection for {x86-TSO}",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "99--109",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738006",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We report on a machine-checked verification of safety
for a state-of-the-art, on-the-fly, concurrent,
mark-sweep garbage collector that is designed for
multi-core architectures with weak memory consistency.
The proof explicitly incorporates the relaxed memory
semantics of x86 multiprocessors. To our knowledge,
this is the first fully machine-checked proof of safety
for such a garbage collector. We couch the proof in a
framework that system implementers will find appealing,
with the fundamental components of the system specified
in a simple and intuitive programming language. The
abstract model is detailed enough for its
correspondence with an assembly language implementation
to be straightforward.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Tassarotti:2015:VRC,
author = "Joseph Tassarotti and Derek Dreyer and Viktor
Vafeiadis",
title = "Verifying read-copy-update in a logic for weak
memory",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "110--120",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737992",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Read-Copy-Update (RCU) is a technique for letting
multiple readers safely access a data structure while a
writer concurrently modifies it. It is used heavily in
the Linux kernel in situations where fast reads are
important and writes are infrequent. Optimized
implementations rely only on the weaker memory
orderings provided by modern hardware, avoiding the
need for expensive synchronization instructions (such
as memory barriers) as much as possible. Using GPS, a
recently developed program logic for the C/C++11 memory
model, we verify an implementation of RCU for a
singly-linked list assuming ``release-acquire''
semantics. Although release-acquire synchronization is
stronger than what is required by real RCU
implementations, it is nonetheless significantly weaker
than the assumption of sequential consistency made in
prior work on RCU verification. Ours is the first
formal proof of correctness for an implementation of
RCU under a weak memory model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Ko:2015:LCT,
author = "Yousun Ko and Bernd Burgstaller and Bernhard Scholz",
title = "{LaminarIR}: compile-time queues for structured
streams",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "121--130",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737994",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Stream programming languages employ FIFO (first-in,
first-out) semantics to model data channels between
producers and consumers. A FIFO data channel stores
tokens in a buffer that is accessed indirectly via
read- and write-pointers. This indirect token-access
decouples a producer's write-operations from the
read-operations of the consumer, thereby making
dataflow implicit. For a compiler, indirect
token-access obscures data-dependencies, which renders
standard optimizations ineffective and impacts stream
program performance negatively. In this paper we
propose a transformation for structured stream
programming languages such as StreamIt that shifts FIFO
buffer management from run-time to compile-time and
eliminates splitters and joiners, whose task is to
distribute and merge streams. To show the effectiveness
of our lowering transformation, we have implemented a
StreamIt to C compilation framework. We have developed
our own intermediate representation (IR) called
LaminarIR, which facilitates the transformation. We
report on the enabling effect of the LaminarIR on
LLVM's optimizations, which required the conversion of
several standard StreamIt benchmarks from static to
randomized input, to prevent computation of partial
results at compile-time. We conducted our experimental
evaluation on the Intel i7-2600K, AMD Opteron 6378,
Intel Xeon Phi 3120A and ARM Cortex-A15 platforms. Our
LaminarIR reduces data-communication on average by
35.9\% and achieves platform-specific speedups between
3.73x and 4.98x over StreamIt. We reduce memory
accesses by more than 60\% and achieve energy savings
of up to 93.6\% on the Intel i7-2600K.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Ding:2015:OCA,
author = "Wei Ding and Xulong Tang and Mahmut Kandemir and
Yuanrui Zhang and Emre Kultursay",
title = "Optimizing off-chip accesses in multicores",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "131--142",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737989",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In a network-on-chip (NoC) based manycore
architecture, an off-chip data access (main memory
access) needs to travel through the on-chip network,
spending considerable amount of time within the chip
(in addition to the memory access latency). In
addition, it contends with on-chip (cache) accesses as
both use the same NoC resources. In this paper,
focusing on data-parallel, multithreaded applications,
we propose a compiler-based off-chip data access
localization strategy, which places data elements in
the memory space such that an off-chip access traverses
a minimum number of links (hops) to reach the memory
controller that handles this access. This brings three
main benefits. First, the network latency of off-chip
accesses gets reduced; second, the network latency of
on-chip accesses gets reduced; and finally, the memory
latency of off-chip accesses improves, due to reduced
queue latencies. We present an experimental evaluation
of our optimization strategy using a set of 13
multithreaded application programs under both private
and shared last-level caches. The results collected
emphasize the importance of optimizing the off-chip
data accesses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Mehta:2015:ICS,
author = "Sanyam Mehta and Pen-Chung Yew",
title = "Improving compiler scalability: optimizing large
programs at small price",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "143--152",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737954",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compiler scalability is a well known problem:
reasoning about the application of useful optimizations
over large program scopes consumes too much time and
memory during compilation. This problem is exacerbated
in polyhedral compilers that use powerful yet costly
integer programming algorithms to compose loop
optimizations. As a result, the benefits that a
polyhedral compiler has to offer to programs such as
real scientific applications that contain sequences of
loop nests, remain impractical for the common users. In
this work, we address this scalability problem in
polyhedral compilers. We identify three causes of
unscalability, each of which stems from large number of
statements and dependences in the program scope. We
propose a one-shot solution to the problem by reducing
the effective number of statements and dependences as
seen by the compiler. We achieve this by representing a
sequence of statements in a program by a single
super-statement. This set of super-statements exposes
the minimum sufficient constraints to the Integer
Linear Programming (ILP) solver for finding correct
optimizations. We implement our approach in the PLuTo
polyhedral compiler and find that it condenses the
program statements and program dependences by factors
of 4.7x and 6.4x, respectively, averaged over 9 hot
regions (ranging from 48 to 121 statements) in 5 real
applications. As a result, the improvements in time and
memory requirement for compilation are 268x and 20x,
respectively, over the latest version of the PLuTo
compiler. The final compile times are comparable to the
Intel compiler while the performance is 1.92x better on
average due to the latter's conservative approach to
loop optimization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Appel:2015:VCP,
author = "Andrew W. Appel",
title = "Verification of a cryptographic primitive: {SHA-256}
(abstract)",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "153--153",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2774972",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A full formal machine-checked verification of a C
program: the OpenSSL implementation of SHA-256. This is
an interactive proof of functional correctness in the
Coq proof assistant, using the Verifiable C program
logic. Verifiable C is a separation logic for the C
language, proved sound w.r.t. the operational semantics
for C, connected to the CompCert verified optimizing C
compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Deligiannis:2015:APA,
author = "Pantazis Deligiannis and Alastair F. Donaldson and
Jeroen Ketema and Akash Lal and Paul Thomson",
title = "Asynchronous programming, analysis and testing with
state machines",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "154--164",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737996",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming efficient asynchronous systems is
challenging because it can often be hard to express the
design declaratively, or to defend against data races
and interleaving-dependent assertion violations.
Previous work has only addressed these challenges in
isolation, by either designing a new declarative
language, a new data race detection tool or a new
testing technique. We present P\#, a language for
high-reliability asynchronous programming co-designed
with a static data race analysis and systematic
concurrency testing infrastructure. We describe our
experience using P\# to write several distributed
protocols and port an industrial-scale system internal
to Microsoft, showing that the combined techniques, by
leveraging the design of P\#, are effective in finding
bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Huang:2015:SMC,
author = "Jeff Huang",
title = "Stateless model checking concurrent programs with
maximal causality reduction",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "165--174",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737975",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present maximal causality reduction (MCR), a new
technique for stateless model checking. MCR
systematically explores the state-space of concurrent
programs with a provably minimal number of executions.
Each execution corresponds to a distinct maximal causal
model extracted from a given execution trace, which
captures the largest possible set of causally
equivalent executions. Moreover, MCR is embarrassingly
parallel by shifting the runtime exploration cost to
offline analysis. We have designed and implemented MCR
using a constraint-based approach and compared with
iterative context bounding (ICB) and dynamic partial
order reduction (DPOR) on both benchmarks and
real-world programs. MCR reduces the number of
executions explored by ICB and ICB+DPOR by orders of
magnitude, and significantly improves the scalability,
efficiency, and effectiveness of the state-of-the-art
for both state-space exploration and bug finding. In
our experiments, MCR has also revealed several new data
races and null pointer dereference errors in frequently
studied real-world programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Samak:2015:SRT,
author = "Malavika Samak and Murali Krishna Ramanathan and
Suresh Jagannathan",
title = "Synthesizing racy tests",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "175--185",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737998",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Subtle concurrency errors in multithreaded libraries
that arise because of incorrect or inadequate
synchronization are often difficult to pinpoint
precisely using only static techniques. On the other
hand, the effectiveness of dynamic race detectors is
critically dependent on multithreaded test suites whose
execution can be used to identify and trigger races.
Usually, such multithreaded tests need to invoke a
specific combination of methods with objects involved
in the invocations being shared appropriately to expose
a race. Without a priori knowledge of the race,
construction of such tests can be challenging. In this
paper, we present a lightweight and scalable technique
for synthesizing precisely these kinds of tests. Given
a multithreaded library and a sequential test suite, we
describe a fully automated analysis that examines
sequential execution traces, and produces as its output
a concurrent client program that drives shared objects
via library method calls to states conducive for
triggering a race. Experimental results on a variety of
well-tested Java libraries yield 101 synthesized
multithreaded tests in less than four minutes.
Analyzing the execution of these tests using an
off-the-shelf race detector reveals 187 harmful races,
including several previously unreported ones. Our
implementation, named NARADA, and the results of our
experiments are available at
http://www.csa.iisc.ernet.in/~sss/tools/narada.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Koskinen:2015:PPM,
author = "Eric Koskinen and Matthew Parkinson",
title = "The {Push\slash Pull} model of transactions",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "186--195",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737995",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a general theory of serializability,
unifying a wide range of transactional algorithms,
including some that are yet to come. To this end, we
provide a compact semantics in which concurrent
transactions PUSH their effects into the shared view
(or UNPUSH to recall effects) and PULL the effects of
potentially uncommitted concurrent transactions into
their local view (or UNPULL to detangle). Each
operation comes with simple criteria given in terms of
commutativity (Lipton's left-movers and right-movers).
The benefit of this model is that most of the elaborate
reasoning (coinduction, simulation, subtle invariants,
etc.) necessary for proving the serializability of a
transactional algorithm is already proved within the
semantic model. Thus, proving serializability (or
opacity) amounts simply to mapping the algorithm on to
our rules, and showing that it satisfies the rules'
criteria.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{McClurg:2015:ESN,
author = "Jedidiah McClurg and Hossein Hojjat and Pavol
Cern{\'y} and Nate Foster",
title = "Efficient synthesis of network updates",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "196--207",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737980",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software-defined networking (SDN) is revolutionizing
the networking industry, but current SDN programming
platforms do not provide automated mechanisms for
updating global configurations on the fly. Implementing
updates by hand is challenging for SDN programmers
because networks are distributed systems with hundreds
or thousands of interacting nodes. Even if initial and
final configurations are correct, naively updating
individual nodes can lead to incorrect transient
behaviors, including loops, black holes, and access
control violations. This paper presents an approach for
automatically synthesizing updates that are guaranteed
to preserve specified properties. We formalize network
updates as a distributed programming problem and
develop a synthesis algorithm based on
counterexample-guided search and incremental model
checking. We describe a prototype implementation, and
present results from experiments on real-world
topologies and properties demonstrating that our tool
scales to updates involving over one-thousand nodes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Nori:2015:ESP,
author = "Aditya V. Nori and Sherjil Ozair and Sriram K.
Rajamani and Deepak Vijaykeerthy",
title = "Efficient synthesis of probabilistic programs",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "208--217",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737982",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show how to automatically synthesize probabilistic
programs from real-world datasets. Such a synthesis is
feasible due to a combination of two techniques: (1) We
borrow the idea of ``sketching'' from synthesis of
deterministic programs, and allow the programmer to
write a skeleton program with ``holes''. Sketches
enable the programmer to communicate domain-specific
intuition about the structure of the desired program
and prune the search space, and (2) we design an
efficient Markov Chain Monte Carlo (MCMC) based
synthesis algorithm to instantiate the holes in the
sketch with program fragments. Our algorithm
efficiently synthesizes a probabilistic program that is
most consistent with the data. A core difficulty in
synthesizing probabilistic programs is computing the
likelihood L(P | D) of a candidate program P generating
data D. We propose an approximate method to compute
likelihoods using mixtures of Gaussian distributions,
thereby avoiding expensive computation of integrals.
The use of such approximations enables us to speed up
evaluation of the likelihood of candidate programs by a
factor of 1000, and makes Markov Chain Monte Carlo
based search feasible. We have implemented our
algorithm in a tool called PSKETCH, and our results are
encouraging PSKETCH is able to automatically synthesize
16 non-trivial real-world probabilistic programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Barowy:2015:FER,
author = "Daniel W. Barowy and Sumit Gulwani and Ted Hart and
Benjamin Zorn",
title = "{FlashRelate}: extracting relational data from
semi-structured spreadsheets using examples",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "218--228",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737952",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "With hundreds of millions of users, spreadsheets are
one of the most important end-user applications.
Spreadsheets are easy to use and allow users great
flexibility in storing data. This flexibility comes at
a price: users often treat spreadsheets as a poor man's
database, leading to creative solutions for storing
high-dimensional data. The trouble arises when users
need to answer queries with their data. Data
manipulation tools make strong assumptions about data
layouts and cannot read these ad-hoc databases.
Converting data into the appropriate layout requires
programming skills or a major investment in manual
reformatting. The effect is that a vast amount of
real-world data is ``locked-in'' to a proliferation of
one-off formats. We introduce FlashRelate, a synthesis
engine that lets ordinary users extract structured
relational data from spreadsheets without programming.
Instead, users extract data by supplying examples of
output relational tuples. FlashRelate uses these
examples to synthesize a program in Flare. Flare is a
novel extraction language that extends regular
expressions with geometric constructs. An interactive
user interface on top of FlashRelate lets end users
extract data by point-and-click. We demonstrate that
correct Flare programs can be synthesized in seconds
from a small set of examples for 43 real-world
scenarios. Finally, our case study demonstrates
FlashRelate's usefulness addressing the widespread
problem of data trapped in corporate and government
formats.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Feser:2015:SDS,
author = "John K. Feser and Swarat Chaudhuri and Isil Dillig",
title = "Synthesizing data structure transformations from
input-output examples",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "229--239",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737977",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a method for example-guided synthesis of
functional programs over recursive data structures.
Given a set of input-output examples, our method
synthesizes a program in a functional language with
higher-order combinators like map and fold. The
synthesized program is guaranteed to be the simplest
program in the language to fit the examples. Our
approach combines three technical ideas: inductive
generalization, deduction, and enumerative search.
First, we generalize the input-output examples into
hypotheses about the structure of the target program.
For each hypothesis, we use deduction to infer new
input/output examples for the missing subexpressions.
This leads to a new subproblem where the goal is to
synthesize expressions within each hypothesis. Since
not every hypothesis can be realized into a program
that fits the examples, we use a combination of
best-first enumeration and deduction to search for a
hypothesis that meets our needs. We have implemented
our method in a tool called \lambda 2, and we evaluate
this tool on a large set of synthesis problems
involving lists, trees, and nested data structures. The
experiments demonstrate the scalability and broad scope
of \lambda 2. A highlight is the synthesis of a program
believed to be the world's earliest functional pearl.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Ziv:2015:CCC,
author = "Ofri Ziv and Alex Aiken and Guy Golan-Gueta and G.
Ramalingam and Mooly Sagiv",
title = "Composing concurrency control",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "240--249",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737970",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrency control poses significant challenges when
composing computations over multiple data-structures
(objects) with different concurrency-control
implementations. We formalize the usually desired
requirements (serializability, abort-safety,
deadlock-safety, and opacity) as well as stronger
versions of these properties that enable composition.
We show how to compose protocols satisfying these
properties so that the resulting combined protocol also
satisfies these properties. Our approach generalizes
well-known protocols (such as two-phase-locking and
two-phase-commit) and leads to new protocols. We apply
this theory to show how we can safely compose
optimistic and pessimistic concurrency control. For
example, we show how we can execute a transaction that
accesses two objects, one controlled by an STM and
another by locking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Zhang:2015:DPO,
author = "Naling Zhang and Markus Kusano and Chao Wang",
title = "Dynamic partial order reduction for relaxed memory
models",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "250--259",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737956",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Under a relaxed memory model such as TSO or PSO, a
concurrent program running on a shared-memory
multiprocessor may observe two types of nondeterminism:
the nondeterminism in thread scheduling and the
nondeterminism in store buffering. Although there is a
large body of work on mitigating the scheduling
nondeterminism during runtime verification, methods for
soundly mitigating the store buffering nondeterminism
are lacking. We propose a new dynamic partial order
reduction (POR) algorithm for verifying concurrent
programs under TSO and PSO. Our method relies on
modeling both types of nondeterminism in a unified
framework, which allows us to extend existing POR
techniques to TSO and PSO without overhauling the
verification algorithm. In addition to sound POR, we
also propose a buffer-bounding method for more
aggressively reducing the state space. We have
implemented our new methods in a stateless model
checking tool and demonstrated their effectiveness on a
set of multithreaded C benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Emmi:2015:MRS,
author = "Michael Emmi and Constantin Enea and Jad Hamza",
title = "Monitoring refinement via symbolic reasoning",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "260--269",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737983",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficient implementations of concurrent objects such
as semaphores, locks, and atomic collections are
essential to modern computing. Programming such objects
is error prone: in minimizing the synchronization
overhead between concurrent object invocations, one
risks the conformance to reference implementations ---
or in formal terms, one risks violating observational
refinement. Precisely testing this refinement even
within a single execution is intractable, limiting
existing approaches to executions with very few object
invocations. We develop scalable and effective
algorithms for detecting refinement violations. Our
algorithms are founded on incremental, symbolic
reasoning, and exploit foundational insights into the
refinement-checking problem. Our approach is sound, in
that we detect only actual violations, and scales far
beyond existing violation-detection algorithms.
Empirically, we find that our approach is practically
complete, in that we detect the violations arising in
actual executions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Longfield:2015:PGS,
author = "Stephen Longfield and Brittany Nkounkou and Rajit
Manohar and Ross Tate",
title = "Preventing glitches and short circuits in high-level
self-timed chip specifications",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "270--279",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737967",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Self-timed chip designs are commonly specified in a
high-level message-passing language called CHP. This
language is closely related to Hoare's CSP except it
admits erroneous behavior due to the necessary
limitations of efficient hardware implementations. For
example, two processes sending on the same channel at
the same time causes glitches and short circuits in the
physical chip implementation. If a CHP program
maintains certain invariants, such as only one process
is sending on any given channel at a time, it can
guarantee an error-free execution that behaves much
like a CSP program would. In this paper, we present an
inferable effect system for ensuring that these
invariants hold, drawing from model-checking
methodologies while exploiting language-usage patterns
and domain-specific specializations to achieve
efficiency. This analysis is sound, and is even
complete for the common subset of CHP programs without
data-sensitive synchronization. We have implemented the
analysis and demonstrated that it scales to validate
even microprocessors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Lal:2015:DID,
author = "Akash Lal and Shaz Qadeer",
title = "{DAG} inlining: a decision procedure for
reachability-modulo-theories in hierarchical programs",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "280--290",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737987",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A hierarchical program is one with multiple procedures
but no loops or recursion. This paper studies the
problem of deciding reachability queries in
hierarchical programs where individual statements can
be encoded in a decidable logic (say in SMT). This
problem is fundamental to verification and most
directly applicable to doing bounded reachability in
programs, i.e., reachability under a bound on the
number of loop iterations and recursive calls. The
usual method of deciding reachability in hierarchical
programs is to first inline all procedures and then do
reachability on the resulting single-procedure program.
Such inlining unfolds the call graph of the program to
a tree and may lead to an exponential increase in the
size of the program. We design and evaluate a method
called DAG inlining that unfolds the call graph to a
directed acyclic graph (DAG) instead of a tree by
sharing the bodies of procedures at certain points
during inlining. DAG inlining can produce much more
compact representations than tree inlining.
Empirically, we show that it leads to significant
improvements in the running time of a state-of-the-art
verifier.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Johnson:2015:EES,
author = "Andrew Johnson and Lucas Waye and Scott Moore and
Stephen Chong",
title = "Exploring and enforcing security guarantees via
program dependence graphs",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "291--302",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737957",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present PIDGIN, a program analysis and
understanding tool that enables the specification and
enforcement of precise application-specific information
security guarantees. PIDGIN also allows developers to
interactively explore the information flows in their
applications to develop policies and investigate
counter-examples. PIDGIN combines program dependence
graphs (PDGs), which precisely capture the information
flows in a whole application, with a custom PDG query
language. Queries express properties about the paths in
the PDG; because paths in the PDG correspond to
information flows in the application, queries can be
used to specify global security policies. PIDGIN is
scalable. Generating a PDG for a 330k line Java
application takes 90 seconds, and checking a policy on
that PDG takes under 14 seconds. The query language is
expressive, supporting a large class of precise,
application-specific security guarantees. Policies are
separate from the code and do not interfere with
testing or development, and can be used for security
regression testing. We describe the design and
implementation of PIDGIN and report on using it: (1) to
explore information security guarantees in legacy
programs; (2) to develop and modify security policies
concurrently with application development; and (3) to
develop policies based on known vulnerabilities.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Singh:2015:MNP,
author = "Gagandeep Singh and Markus P{\"u}schel and Martin
Vechev",
title = "Making numerical program analysis fast",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "303--313",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738000",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Numerical abstract domains are a fundamental component
in modern static program analysis and are used in a
wide range of scenarios (e.g. computing array bounds,
disjointness, etc). However, analysis with these
domains can be very expensive, deeply affecting the
scalability and practical applicability of the static
analysis. Hence, it is critical to ensure that these
domains are made highly efficient. In this work, we
present a complete approach for optimizing the
performance of the Octagon numerical abstract domain, a
domain shown to be particularly effective in practice.
Our optimization approach is based on two key insights:
(i) the ability to perform online decomposition of the
octagons leading to a massive reduction in operation
counts, and (ii) leveraging classic performance
optimizations from linear algebra such as
vectorization, locality of reference, scalar
replacement and others, for improving the key
bottlenecks of the domain. Applying these ideas, we
designed new algorithms for the core Octagon operators
with better asymptotic runtime than prior work and
combined them with the optimization techniques to
achieve high actual performance. We implemented our
approach in the Octagon operators exported by the
popular APRON C library, thus enabling existing static
analyzers using APRON to immediately benefit from our
work. To demonstrate the performance benefits of our
approach, we evaluated our framework on three published
static analyzers showing massive speed-ups for the time
spent in Octagon analysis (e.g., up to 146x) as well as
significant end-to-end program analysis speed-ups (up
to 18.7x). Based on these results, we believe that our
framework can serve as a new basis for static analysis
with the Octagon numerical domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Weijiang:2015:TDA,
author = "Yusheng Weijiang and Shruthi Balakrishna and Jianqiao
Liu and Milind Kulkarni",
title = "Tree dependence analysis",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "314--325",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737972",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We develop a new framework for analyzing recursive
methods that perform traversals over trees, called tree
dependence analysis. This analysis translates
dependence analysis techniques for regular programs to
the irregular space, identifying the structure of
dependences within a recursive method that traverses
trees. We develop a dependence test that exploits the
dependence structure of such programs, and can prove
that several locality- and parallelism- enhancing
transformations are legal. In addition, we extend our
analysis with a novel path-dependent, conditional
analysis to refine the dependence test and prove the
legality of transformations for a wider range of
algorithms. We then use these analyses to show that
several common algorithms that manipulate trees
recursively are amenable to several locality- and
parallelism-enhancing transformations. This work shows
that classical dependence analysis techniques, which
have largely been confined to nested loops over array
data structures, can be extended and translated to work
for complex, recursive programs that operate over
pointer-based data structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Kang:2015:FCM,
author = "Jeehoon Kang and Chung-Kil Hur and William Mansky and
Dmitri Garbuzov and Steve Zdancewic and Viktor
Vafeiadis",
title = "A formal {C} memory model supporting integer-pointer
casts",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "326--335",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738005",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The ISO C standard does not specify the semantics of
many valid programs that use non-portable idioms such
as integer-pointer casts. Recent efforts at formal
definitions and verified implementation of the C
language inherit this feature. By adopting high-level
abstract memory models, they validate common
optimizations. On the other hand, this prevents
reasoning about much low-level code relying on the
behavior of common implementations, where formal
verification has many applications. We present the
first formal memory model that allows many common
optimizations and fully supports operations on the
representation of pointers. All arithmetic operations
are well-defined for pointers that have been cast to
integers. Crucially, our model is also simple to
understand and program with. All our results are fully
formalized in Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Hathhorn:2015:DUC,
author = "Chris Hathhorn and Chucky Ellison and Grigore Rosu",
title = "Defining the undefinedness of {C}",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "336--345",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737979",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a ``negative'' semantics of the C11
language---a semantics that does not just give meaning
to correct programs, but also rejects undefined
programs. We investigate undefined behavior in C and
discuss the techniques and special considerations
needed for formally specifying it. We have used these
techniques to modify and extend a semantics of C into
one that captures undefined behavior. The amount of
semantic infrastructure and effort required to achieve
this was unexpectedly high, in the end nearly doubling
the size of the original semantics. From our semantics,
we have automatically extracted an undefinedness
checker, which we evaluate against other popular
analysis tools, using our own test suite in addition to
a third-party test suite. Our checker is capable of
detecting examples of all 77 categories of core
language undefinedness appearing in the C11 standard,
more than any other tool we considered. Based on this
evaluation, we argue that our work is the most
comprehensive and complete semantic treatment of
undefined behavior in C, and thus of the C language
itself.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Park:2015:KCF,
author = "Daejun Park and Andrei Stefanescu and Grigore Rosu",
title = "{KJS}: a complete formal semantics of {JavaScript}",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "346--356",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737991",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents KJS, the most complete and
thoroughly tested formal semantics of JavaScript to
date. Being executable, KJS has been tested against the
ECMAScript 5.1 conformance test suite, and passes all
2,782 core language tests. Among the existing
implementations of JavaScript, only Chrome V8's passes
all the tests, and no other semantics passes more than
90\%. In addition to a reference implementation for
JavaScript, KJS also yields a simple coverage metric
for a test suite: the set of semantic rules it
exercises. Our semantics revealed that the ECMAScript
5.1 conformance test suite fails to cover several
semantic rules. Guided by the semantics, we wrote tests
to exercise those rules. The new tests revealed bugs
both in production JavaScript engines (Chrome V8,
Safari WebKit, Firefox SpiderMonkey) and in other
semantics. KJS is symbolically executable, thus it can
be used for formal analysis and verification of
JavaScript programs. We verified non-trivial programs
and found a known security vulnerability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Wilcox:2015:VFI,
author = "James R. Wilcox and Doug Woos and Pavel Panchekha and
Zachary Tatlock and Xi Wang and Michael D. Ernst and
Thomas Anderson",
title = "{Verdi}: a framework for implementing and formally
verifying distributed systems",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "357--368",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737958",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Distributed systems are difficult to implement
correctly because they must handle both concurrency and
failures: machines may crash at arbitrary points and
networks may reorder, drop, or duplicate packets.
Further, their behavior is often too complex to permit
exhaustive testing. Bugs in these systems have led to
the loss of critical data and unacceptable service
outages. We present Verdi, a framework for implementing
and formally verifying distributed systems in Coq.
Verdi formalizes various network semantics with
different faults, and the developer chooses the most
appropriate fault model when verifying their
implementation. Furthermore, Verdi eases the
verification burden by enabling the developer to first
verify their system under an idealized fault model,
then transfer the resulting correctness guarantees to a
more realistic fault model without any additional proof
burden. To demonstrate Verdi's utility, we present the
first mechanically checked proof of linearizability of
the Raft state machine replication algorithm, as well
as verified implementations of a primary-backup
replication system and a key-value store. These
verified systems provide similar performance to
unverified equivalents.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Olivo:2015:SDA,
author = "Oswaldo Olivo and Isil Dillig and Calvin Lin",
title = "Static detection of asymptotic performance bugs in
collection traversals",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "369--378",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737966",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper identifies and formalizes a prevalent class
of asymptotic performance bugs called redundant
traversal bugs and presents a novel static analysis for
automatically detecting them. We evaluate our technique
by implementing it in a tool called CLARITY and
applying it to widely-used software packages such as
the Google Core Collections Library, the Apache Common
Collections, and the Apache Ant build tool. Across 1.6M
lines of Java code, CLARITY finds 92 instances of
redundant traversal bugs, including 72 that have never
been previously reported, with just 5 false positives.
To evaluate the performance impact of these bugs, we
manually repair these programs and find that for an
input size of 50,000, all repaired programs are at
least 2.45 faster than their original code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Ding:2015:AAC,
author = "Yufei Ding and Jason Ansel and Kalyan Veeramachaneni
and Xipeng Shen and Una-May O'Reilly and Saman
Amarasinghe",
title = "Autotuning algorithmic choice for input sensitivity",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "379--390",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737969",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A daunting challenge faced by program performance
autotuning is input sensitivity, where the best
autotuned configuration may vary with different input
sets. This paper presents a novel two-level input
learning algorithm to tackle the challenge for an
important class of autotuning problems, algorithmic
autotuning. The new approach uses a two-level input
clustering method to automatically refine input
grouping, feature selection, and classifier
construction. Its design solves a series of open issues
that are particularly essential to algorithmic
autotuning, including the enormous optimization space,
complex influence by deep input features, high cost in
feature extraction, and variable accuracy of
algorithmic choices. Experimental results show that the
new solution yields up to a 3x speedup over using a
single configuration for all inputs, and a 34x speedup
over a traditional one-level method for addressing
input sensitivity in program optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Mendis:2015:HLH,
author = "Charith Mendis and Jeffrey Bosboom and Kevin Wu and
Shoaib Kamil and Jonathan Ragan-Kelley and Sylvain
Paris and Qin Zhao and Saman Amarasinghe",
title = "Helium: lifting high-performance stencil kernels from
stripped x86 binaries to halide {DSL} code",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "391--402",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737974",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Highly optimized programs are prone to bit rot, where
performance quickly becomes suboptimal in the face of
new hardware and compiler techniques. In this paper we
show how to automatically lift performance-critical
stencil kernels from a stripped x86 binary and generate
the corresponding code in the high-level
domain-specific language Halide. Using Halide's
state-of-the-art optimizations targeting current
hardware, we show that new optimized versions of these
kernels can replace the originals to rejuvenate the
application for newer hardware. The original optimized
code for kernels in stripped binaries is nearly
impossible to analyze statically. Instead, we rely on
dynamic traces to regenerate the kernels. We perform
buffer structure reconstruction to identify input,
intermediate and output buffer shapes. We abstract from
a forest of concrete dependency trees which contain
absolute memory addresses to symbolic trees suitable
for high-level code generation. This is done by
canonicalizing trees, clustering them based on
structure, inferring higher-dimensional buffer accesses
and finally by solving a set of linear equations based
on buffer accesses to lift them up to simple,
high-level expressions. Helium can handle highly
optimized, complex stencil kernels with input-dependent
conditionals. We lift seven kernels from Adobe
Photoshop giving a 75\% performance improvement, four
kernels from IrfanView, leading to 4.97$ \times $
performance, and one stencil from the miniGMG multigrid
benchmark netting a 4.25$ \times $ improvement in
performance. We manually rejuvenated Photoshop by
replacing eleven of Photoshop's filters with our lifted
implementations, giving 1.12$ \times $ speedup without
affecting the user experience.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Bowman:2015:PGM,
author = "William J. Bowman and Swaha Miller and Vincent
St-Amour and R. Kent Dybvig",
title = "Profile-guided meta-programming",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "403--412",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737990",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Contemporary compiler systems such as GCC, .NET, and
LLVM incorporate profile-guided optimizations (PGOs) on
low-level intermediate code and basic blocks, with
impressive results over purely static heuristics.
Recent work shows that profile information is also
useful for performing source-to-source optimizations
via meta-programming. For example, using profiling
information to inform decisions about data structures
and algorithms can potentially lead to asymptotic
improvements in performance. We present a design for
profile-guided meta-programming in a general-purpose
meta-programming system. Our design is parametric over
the particular profiler and meta-programming system. We
implement this design in two different meta-programming
systems---the syntactic extensions systems of Chez
Scheme and Racket---and provide several profile-guided
meta-programs as usability case studies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Sivaramakrishnan:2015:DPE,
author = "KC Sivaramakrishnan and Gowtham Kaki and Suresh
Jagannathan",
title = "Declarative programming over eventually consistent
data stores",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "413--424",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737981",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "User-facing online services utilize geo-distributed
data stores to minimize latency and tolerate partial
failures, with the intention of providing a fast,
always-on experience. However, geo-distribution does
not come for free; application developers have to
contend with weak consistency behaviors, and the lack
of abstractions to composably construct high-level
replicated data types, necessitating the need for
complex application logic and invariably exposing
inconsistencies to the user. Some commercial
distributed data stores and several academic proposals
provide a lattice of consistency levels, with stronger
consistency guarantees incurring increased latency and
throughput costs. However, correctly assigning the
right consistency level for an operation requires
subtle reasoning and is often an error-prone task. In
this paper, we present QUELEA, a declarative
programming model for eventually consistent data stores
(ECDS), equipped with a contract language, capable of
specifying fine-grained application --- level
consistency properties. A contract enforcement system
analyses contracts, and automatically generates the
appropriate consistency protocol for the method
protected by the contract. We describe an
implementation of QUELEA on top of an off-the-shelf
ECDS that provides support for coordination-free
transactions. Several benchmarks including two large
web applications, illustrate the effectiveness of our
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Siek:2015:BCT,
author = "Jeremy Siek and Peter Thiemann and Philip Wadler",
title = "Blame and coercion: together again for the first
time",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "425--435",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737968",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "C\#, Dart, Pyret, Racket, TypeScript, VB: many recent
languages integrate dynamic and static types via
gradual typing. We systematically develop three calculi
for gradual typing and the relations between them,
building on and strengthening previous work. The
calculi are: \lambda B, based on the blame calculus of
Wadler and Findler (2009); \lambda C, inspired by the
coercion calculus of Henglein (1994); \lambda S
inspired by the space-efficient calculus of Herman,
Tomb, and Flanagan (2006) and the threesome calculus of
Siek and Wadler (2010). While \lambda B is little
changed from previous work, \lambda C and \lambda S are
new. Together, \lambda B, \lambda C, and \lambda S
provide a coherent foundation for design,
implementation, and optimisation of gradual types. We
define translations from \lambda B to \lambda C and
from \lambda C to \lambda S. Much previous work lacked
proofs of correctness or had weak correctness criteria;
here we demonstrate the strongest correctness criterion
one could hope for, that each of the translations is
fully abstract. Each of the calculi reinforces the
design of the others: \lambda C has a particularly
simple definition, and the subtle definition of blame
safety for \lambda B is justified by the simple
definition of blame safety for \lambda C. Our calculus
\lambda S is implementation-ready: the first
space-efficient calculus that is both straightforward
to implement and easy to understand. We give two
applications: first, using full abstraction from
\lambda C to \lambda S to validate the challenging part
of full abstraction between \lambda B and \lambda C;
and, second, using full abstraction from \lambda B to
\lambda S to easily establish the Fundamental Property
of Casts, which required a custom bisimulation and six
lemmas in earlier work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Zhang:2015:LFO,
author = "Yizhou Zhang and Matthew C. Loring and Guido
Salvaneschi and Barbara Liskov and Andrew C. Myers",
title = "Lightweight, flexible object-oriented generics",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "436--445",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738008",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The support for generic programming in modern
object-oriented programming languages is awkward and
lacks desirable expressive power. We introduce an
expressive genericity mechanism that adds expressive
power and strengthens static checking, while remaining
lightweight and simple in common use cases. Like type
classes and concepts, the mechanism allows existing
types to model type constraints retroactively. For
expressive power, we expose models as named constructs
that can be defined and selected explicitly to witness
constraints; in common uses of genericity, however,
types implicitly witness constraints without additional
programmer effort. Models are integrated into the
object-oriented style, with features like model
generics, model-dependent types, model enrichment,
model multimethods, constraint entailment, model
inheritance, and existential quantification further
extending expressive power in an object-oriented
setting. We introduce the new genericity features and
show that common generic programming idioms, including
current generic libraries, can be expressed more
precisely and concisely. The static semantics of the
mechanism and a proof of a key decidability property
can be found in an associated technical report.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Nguyen:2015:RCC,
author = "Ph{\'u}c C. Nguy{\v{e}}n and David {Van Horn}",
title = "Relatively complete counterexamples for higher-order
programs",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "446--456",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737971",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we study the problem of generating
inputs to a higher-order program causing it to error.
We first approach the problem in the setting of PCF, a
typed, core functional language and contribute the
first relatively complete method for constructing
counterexamples for PCF programs. The method is
relatively complete with respect to a first-order
solver over the base types of PCF. In practice, this
means an SMT solver can be used for the effective,
automated generation of higher-order counterexamples
for a large class of programs. We achieve this result
by employing a novel form of symbolic execution for
higher-order programs. The remarkable aspect of this
symbolic execution is that even though symbolic
higher-order inputs and values are considered, the path
condition remains a first-order formula. Our handling
of symbolic function application enables the
reconstruction of higher-order counterexamples from
this first-order formula. After establishing our main
theoretical results, we sketch how to apply the
approach to untyped, higher-order, stateful languages
with first-class contracts and show how counterexample
generation can be used to detect contract violations in
this setting. To validate our approach, we implement a
tool generating counterexamples for erroneous modules
written in Racket.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Chu:2015:AIP,
author = "Duc-Hiep Chu and Joxan Jaffar and Minh-Thai Trinh",
title = "Automatic induction proofs of data-structures in
imperative programs",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "457--466",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737984",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider the problem of automated reasoning about
dynamically manipulated data structures. Essential
properties are encoded as predicates whose definitions
are formalized via user-defined recursive rules.
Traditionally, proving relationships between such
properties is limited to the unfold-and-match (U+M)
paradigm which employs systematic transformation steps
of folding/unfolding the rules. A proof, using U+M,
succeeds when we find a sequence of transformations
that produces a final formula which is obviously
provable by simply matching terms. Our contribution
here is the addition of the fundamental principle of
induction to this automated process. We first show that
some proof obligations that are dynamically generated
in the process can be used as induction hypotheses in
the future, and then we show how to use these
hypotheses in an induction step which generates a new
proof obligation aside from those obtained by using the
fold/unfold operations. While the adding of induction
is an obvious need in general, no automated method has
managed to include this in a systematic and general
way. The main reason for this is the problem of
avoiding circular reasoning. We overcome this with a
novel checking condition. In summary, our contribution
is a proof method which --- beyond U+M --- performs
automatic formula re-writing by treating previously
encountered obligations in each proof path as possible
induction hypotheses. In the practical evaluation part
of this paper, we show how the commonly used technique
of using unproven lemmas can be avoided, using
realistic benchmarks. This not only removes the current
burden of coming up with the appropriate lemmas, but
also significantly boosts up the verification process,
since lemma applications, coupled with unfolding, often
induce a large search space. In the end, our method can
automatically reason about a new class of formulas
arising from practical program verification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Carbonneaux:2015:CCR,
author = "Quentin Carbonneaux and Jan Hoffmann and Zhong Shao",
title = "Compositional certified resource bounds",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "467--478",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737955",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a new approach for automatically
deriving worst-case resource bounds for C programs. The
described technique combines ideas from amortized
analysis and abstract interpretation in a unified
framework to address four challenges for
state-of-the-art techniques: compositionality, user
interaction, generation of proof certificates, and
scalability. Compositionality is achieved by
incorporating the potential method of amortized
analysis. It enables the derivation of global
whole-program bounds with local derivation rules by
naturally tracking size changes of variables in
sequenced loops and function calls. The resource
consumption of functions is described abstractly and a
function call can be analyzed without access to the
function body. User interaction is supported with a new
mechanism that clearly separates qualitative and
quantitative verification. A user can guide the
analysis to derive complex non-linear bounds by using
auxiliary variables and assertions. The assertions are
separately proved using established qualitative
techniques such as abstract interpretation or Hoare
logic. Proof certificates are automatically generated
from the local derivation rules. A soundness proof of
the derivation system with respect to a formal cost
semantics guarantees the validity of the certificates.
Scalability is attained by an efficient reduction of
bound inference to a linear optimization problem that
can be solved by off-the-shelf LP solvers. The analysis
framework is implemented in the publicly-available tool
C4B. An experimental evaluation demonstrates the
advantages of the new technique with a comparison of
C4B with existing tools on challenging micro benchmarks
and the analysis of more than 2900 lines of C code from
the cBench benchmark suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Crary:2015:PPA,
author = "Karl Crary and Michael J. Sullivan",
title = "Peer-to-peer affine commitment using bitcoin",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "479--488",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737997",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The power of linear and affine logic lies in their
ability to model state change. However, in a trustless,
peer-to-peer setting, it is difficult to force
principals to commit to state changes. We show how to
solve the peer-to-peer affine commitment problem using
a generalization of Bitcoin in which transactions deal
in types rather than numbers. This has applications to
proof-carrying authorization and mechanically
executable contracts. Importantly, our system can
be---and is---implemented on top of the existing
Bitcoin network, so there is no need to recruit
computing power to a new protocol.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Le:2015:TNT,
author = "Ton Chanh Le and Shengchao Qin and Wei-Ngan Chin",
title = "Termination and non-termination specification
inference",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "489--498",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737993",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Techniques for proving termination and non-termination
of imperative programs are usually considered as
orthogonal mechanisms. In this paper, we propose a
novel mechanism that analyzes and proves both program
termination and non-termination at the same time. We
first introduce the concept of second-order termination
constraints and accumulate a set of relational
assumptions on them via a Hoare-style verification. We
then solve these assumptions with case analysis to
determine the (conditional) termination and
non-termination scenarios expressed in some
specification logic form. In contrast to current
approaches, our technique can construct a summary of
terminating and non-terminating behaviors for each
method. This enables modularity and reuse for our
termination and non-termination proving processes. We
have tested our tool on sample programs from a recent
termination competition, and compared favorably against
state-of-the-art termination analyzers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Emani:2015:CDM,
author = "Murali Krishna Emani and Michael O'Boyle",
title = "Celebrating diversity: a mixture of experts approach
for runtime mapping in dynamic environments",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "499--508",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737999",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Matching program parallelism to platform parallelism
using thread selection is difficult when the
environment and available resources dynamically change.
Existing compiler or runtime approaches are typically
based on a one-size fits all policy. There is little
ability to either evaluate or adapt the policy when
encountering new external workloads or hardware
resources. This paper focuses on selecting the best
number of threads for a parallel application in dynamic
environments. It develops a new scheme based on a
mixture of experts approach. It learns online which, of
a number of existing policies, or experts, is best
suited for a particular environment without having to
try out each policy. It does this by using a novel
environment predictor as a proxy for the quality of an
expert thread selection policy. Additional expert
policies can easily be added and are selected only when
appropriate. We evaluate our scheme in environments
with varying external workloads and hardware
resources.We then consider the case when workloads use
affinity scheduling or are themselves adaptive and show
that our approach, in all cases, outperforms existing
schemes and surprisingly improves workload performance.
On average, we improve 1.66x over OpenMP default, 1.34x
over an online scheme, 1.25x over an offline policy and
1.2x over a state-of-art analytic model. Determining
the right number and type of experts is an open problem
and our initial analysis shows that adding more experts
improves accuracy and performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Ren:2015:EER,
author = "Bin Ren and Youngjoon Jo and Sriram Krishnamoorthy and
Kunal Agrawal and Milind Kulkarni",
title = "Efficient execution of recursive programs on commodity
vector hardware",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "509--520",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738004",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The pursuit of computational efficiency has led to the
proliferation of throughput-oriented hardware, from
GPUs to increasingly wide vector units on commodity
processors and accelerators. This hardware is designed
to efficiently execute data-parallel computations in a
vectorized manner. However, many algorithms are more
naturally expressed as divide-and-conquer, recursive,
task-parallel computations. In the absence of data
parallelism, it seems that such algorithms are not well
suited to throughput-oriented architectures. This paper
presents a set of novel code transformations that
expose the data parallelism latent in recursive,
task-parallel programs. These transformations
facilitate straightforward vectorization of
task-parallel programs on commodity hardware. We also
present scheduling policies that maintain high
utilization of vector resources while limiting space
usage. Across several task-parallel benchmarks, we
demonstrate both efficient vector resource utilization
and substantial speedup on chips using Intel's SSE4.2
vector units, as well as accelerators using Intel's
AVX512 units.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Venkat:2015:LDT,
author = "Anand Venkat and Mary Hall and Michelle Strout",
title = "Loop and data transformations for sparse matrix code",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "521--532",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738003",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces three new compiler
transformations for representing and transforming
sparse matrix computations and their data
representations. In cooperation with run-time
inspection, our compiler derives transformed matrix
representations and associated transformed code to
implement a variety of representations targeting
different architecture platforms. This systematic
approach to combining code and data transformations on
sparse computations, which extends a polyhedral
transformation and code generation framework, permits
the compiler to compose these transformations with
other transformations to generate code that is on
average within 5\% and often exceeds manually-tuned,
high-performance sparse matrix libraries CUSP and OSKI.
Additionally, the compiler-generated inspector codes
are on average 1.5 faster than OSKI and perform
comparably to CUSP, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Prountzos:2015:SPG,
author = "Dimitrios Prountzos and Roman Manevich and Keshav
Pingali",
title = "Synthesizing parallel graph programs via automated
planning",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "533--544",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737953",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a system that uses automated planning to
synthesize correct and efficient parallel graph
programs from high-level algorithmic specifications.
Automated planning allows us to use constraints to
declaratively encode program transformations such as
scheduling, implementation selection, and insertion of
synchronization. Each plan emitted by the planner
satisfies all constraints simultaneously, and
corresponds to a composition of these transformations.
In this way, we obtain an integrated compilation
approach for a very challenging problem domain. We have
used this system to synthesize parallel programs for
four graph problems: triangle counting, maximal
independent set computation, preflow-push maxflow, and
connected components. Experiments on a variety of
inputs show that the synthesized implementations
perform competitively with hand-written, highly-tuned
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Marr:2015:ZOM,
author = "Stefan Marr and Chris Seaton and St{\'e}phane
Ducasse",
title = "Zero-overhead metaprogramming: reflection and
metaobject protocols fast and without compromises",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "545--554",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737963",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Runtime metaprogramming enables many useful
applications and is often a convenient solution to
solve problems in a generic way, which makes it widely
used in frameworks, middleware, and domain-specific
languages. However, powerful metaobject protocols are
rarely supported and even common concepts such as
reflective method invocation or dynamic proxies are not
optimized. Solutions proposed in literature either
restrict the metaprogramming capabilities or require
application or library developers to apply performance
improving techniques. For overhead-free runtime
metaprogramming, we demonstrate that dispatch chains, a
generalized form of polymorphic inline caches common to
self-optimizing interpreters, are a simple optimization
at the language-implementation level. Our evaluation
with self-optimizing interpreters shows that
unrestricted metaobject protocols can be realized for
the first time without runtime overhead, and that this
optimization is applicable for just-in-time compilation
of interpreters based on meta-tracing as well as
partial evaluation. In this context, we also
demonstrate that optimizing common reflective
operations can lead to significant performance
improvements for existing applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Isradisaikul:2015:FCP,
author = "Chinawat Isradisaikul and Andrew C. Myers",
title = "Finding counterexamples from parsing conflicts",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "555--564",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737961",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing a parser remains remarkably painful. Automatic
parser generators offer a powerful and systematic way
to parse complex grammars, but debugging conflicts in
grammars can be time-consuming even for experienced
language designers. Better tools for diagnosing parsing
conflicts will alleviate this difficulty. This paper
proposes a practical algorithm that generates compact,
helpful counterexamples for LALR grammars. For each
parsing conflict in a grammar, a counterexample
demonstrating the conflict is constructed. When the
grammar in question is ambiguous, the algorithm usually
generates a compact counterexample illustrating the
ambiguity. This algorithm has been implemented as an
extension to the CUP parser generator. The results from
applying this implementation to a diverse collection of
faulty grammars show that the algorithm is practical,
effective, and suitable for inclusion in other LALR
parser generators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Leung:2015:IPS,
author = "Alan Leung and John Sarracino and Sorin Lerner",
title = "Interactive parser synthesis by example",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "565--574",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738002",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite decades of research on parsing, the
construction of parsers remains a painstaking, manual
process prone to subtle bugs and pitfalls. We present a
programming-by-example framework called Parsify that is
able to synthesize a parser from input/output examples.
The user does not write a single line of code. To
achieve this, Parsify provides: (a) an iterative
algorithm for synthesizing and refining a grammar one
example at a time, (b) an interface that provides
immediate visual feedback in response to changes in the
grammar being refined, and (c) a graphical mechanism
for specifying example parse trees using only textual
selections. We empirically demonstrate the viability of
our approach by using Parsify to construct parsers for
source code drawn from Verilog, SQL, Apache, and
Tiger.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Lucia:2015:SSP,
author = "Brandon Lucia and Benjamin Ransford",
title = "A simpler, safer programming and execution model for
intermittent systems",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "575--585",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737978",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy harvesting enables novel devices and
applications without batteries, but intermittent
operation under energy harvesting poses new challenges
to memory consistency that threaten to leave
applications in failed states not reachable in
continuous execution. This paper presents analytical
models that aid in reasoning about intermittence. Using
these, we develop DINO (Death Is Not an Option), a
programming and execution model that simplifies
programming for intermittent systems and ensures
volatile and nonvolatile data consistency despite
near-constant interruptions. DINO is the first system
to address these consistency problems in the context of
intermittent execution. We evaluate DINO on three
energy-harvesting hardware platforms running different
applications. The applications fail and exhibit error
without DINO, but run correctly with DINO's modest
1.8-2.7$ \times $ run-time overhead. DINO also
dramatically simplifies programming, reducing the set
of possible failure-related control transfers by 5--9$
\times $.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Machado:2015:CDD,
author = "Nuno Machado and Brandon Lucia and Lu{\'\i}s
Rodrigues",
title = "Concurrency debugging with differential schedule
projections",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "586--595",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737973",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Symbiosis: a concurrency debugging
technique based on novel differential schedule
projections (DSPs). A DSP shows the small set of memory
operations and data-flows responsible for a failure, as
well as a reordering of those elements that avoids the
failure. To build a DSP, Symbiosis first generates a
full, failing, multithreaded schedule via thread path
profiling and symbolic constraint solving. Symbiosis
selectively reorders events in the failing schedule to
produce a non-failing, alternate schedule. A DSP
reports the ordering and data-flow differences between
the failing and non-failing schedules. Our evaluation
on buggy real-world software and benchmarks shows that,
in practical time, Symbiosis generates DSPs that both
isolate the small fraction of event orders and
data-flows responsible for the failure, and show which
event reorderings prevent failing. In our experiments,
DSPs contain 81\% fewer events and 96\% less data-flows
than the full failure-inducing schedules. Moreover, by
allowing developers to focus on only a few events, DSPs
reduce the amount of time required to find a valid
fix.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Srinivasan:2015:SMC,
author = "Venkatesh Srinivasan and Thomas Reps",
title = "Synthesis of machine code from semantics",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "596--607",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737960",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we present a technique to synthesize
machine-code instructions from a semantic
specification, given as a Quantifier-Free Bit-Vector
(QFBV) logic formula. Our technique uses an
instantiation of the Counter-Example Guided Inductive
Synthesis (CEGIS) framework, in combination with
search-space pruning heuristics to synthesize
instruction-sequences. To counter the exponential cost
inherent in enumerative synthesis, our technique uses a
divide-and-conquer strategy to break the input QFBV
formula into independent sub-formulas, and synthesize
instructions for the sub-formulas. Synthesizers created
by our technique could be used to create
semantics-based binary rewriting tools such as
optimizers, partial evaluators, program
obfuscators/de-obfuscators, etc. Our experiments for
Intel's IA-32 instruction set show that, in comparison
to our baseline algorithm, our search-space pruning
heuristics reduce the synthesis time by a factor of
473, and our divide-and-conquer strategy reduces the
synthesis time by a further 3 to 5 orders of
magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Gonnord:2015:SRF,
author = "Laure Gonnord and David Monniaux and Gabriel Radanne",
title = "Synthesis of ranking functions using extremal
counterexamples",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "608--618",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737976",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a complete method for synthesizing
lexicographic linear ranking functions (and thus
proving termination), supported by inductive
invariants, in the case where the transition relation
of the program includes disjunctions and existentials
(large block encoding of control flow). Previous work
would either synthesize a ranking function at every
basic block head, not just loop headers, which reduces
the scope of programs that may be proved to be
terminating, or expand large block transitions
including tests into (exponentially many) elementary
transitions, prior to computing the ranking function,
resulting in a very large global constraint system. In
contrast, our algorithm incrementally refines a global
linear constraint system according to extremal
counterexamples: only constraints that exclude spurious
solutions are included. Experiments with our tool
Termite show marked performance and scalability
improvements compared to other systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Osera:2015:TED,
author = "Peter-Michael Osera and Steve Zdancewic",
title = "Type-and-example-directed program synthesis",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "619--630",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2738007",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents an algorithm for synthesizing
recursive functions that process algebraic datatypes.
It is founded on proof-theoretic techniques that
exploit both type information and input-output examples
to prune the search space. The algorithm uses
refinement trees, a data structure that succinctly
represents constraints on the shape of generated code.
We evaluate the algorithm by using a prototype
implementation to synthesize more than 40 benchmarks
and several non-trivial larger examples. Our results
demonstrate that the approach meets or outperforms the
state-of-the-art for this domain, in terms of synthesis
time or attainable size of the generated programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '15 conference proceedings.",
}
@Article{Tu:2015:CIE,
author = "Cheng-Chun Tu and Michael Ferdman and Chao-tung Lee
and Tzi-cker Chiueh",
title = "A Comprehensive Implementation and Evaluation of
Direct Interrupt Delivery",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "1--15",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731189",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As the performance overhead associated with CPU and
memory virtualization becomes largely negligible,
research efforts are directed toward reducing the I/O
virtualization overhead, which mainly comes from two
sources: DMA set-up and payload copy, and interrupt
delivery. The advent of SRIOV and MRIOV effectively
reduces the DMA-related virtualization overhead to a
minimum. Therefore, the last battleground for
minimizing virtualization overhead is how to directly
deliver every interrupt to its target VM without
involving the hypervisor. This paper describes the
design, implementation, and evaluation of a KVM-based
direct interrupt delivery system called DID. DID
delivers interrupts from SRIOV devices, virtual
devices, and timers to their target VMs directly,
completely avoiding VM exits. Moreover, DID does not
require any modifications to the VM's operating system
and preserves the correct priority among interrupts in
all cases. We demonstrate that DID reduces the number
of VM exits by a factor of 100 for I/O-intensive
workloads, decreases the interrupt invocation latency
by 80\%, and improves the throughput of a VM running
Memcached by a factor of 3.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Pfefferle:2015:HVF,
author = "Jonas Pfefferle and Patrick Stuedi and Animesh Trivedi
and Bernard Metzler and Ionnis Koltsidas and Thomas R.
Gross",
title = "A Hybrid {I/O} Virtualization Framework for
{RDMA}-capable Network Interfaces",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "17--30",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731200",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "DMA-capable interconnects, providing ultra-low latency
and high bandwidth, are increasingly being used in the
context of distributed storage and data processing
systems. However, the deployment of such systems in
virtualized data centers is currently inhibited by the
lack of a flexible and high-performance virtualization
solution for RDMA network interfaces. In this work, we
present a hybrid virtualization architecture which
builds upon the concept of separation of paths for
control and data operations available in RDMA. With
hybrid virtualization, RDMA control operations are
virtualized using hypervisor involvement, while data
operations are set up to bypass the hypervisor
completely. We describe HyV (Hybrid Virtualization), a
virtualization framework for RDMA devices implementing
such a hybrid architecture. In the paper, we provide a
detailed evaluation of HyV for different RDMA
technologies and operations. We further demonstrate the
advantages of HyV in the context of a real distributed
system by running RAMCloud on a set of HyV-enabled
virtual machines deployed across a 6-node RDMA cluster.
All of the performance results we obtained illustrate
that hybrid virtualization enables bare-metal RDMA
performance inside virtual machines while retaining the
flexibility typically associated with
paravirtualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Younge:2015:SHP,
author = "Andrew J. Younge and John Paul Walters and Stephen P.
Crago and Geoffrey C. Fox",
title = "Supporting High Performance Molecular Dynamics in
Virtualized Clusters using {IOMMU}, {SR-IOV}, and
{GPUDirect}",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "31--38",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731194",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Cloud Infrastructure-as-a-Service paradigms have
recently shown their utility for a vast array of
computational problems, ranging from advanced web
service architectures to high throughput computing.
However, many scientific computing applications have
been slow to adapt to virtualized cloud frameworks.
This is due to performance impacts of virtualization
technologies, coupled with the lack of advanced
hardware support necessary for running many high
performance scientific applications at scale. By using
KVM virtual machines that leverage both Nvidia GPUs and
InfiniBand, we show that molecular dynamics simulations
with LAMMPS and HOOMD run at near-native speeds. This
experiment also illustrates how virtualized
environments can support the latest parallel computing
paradigms, including both MPI+CUDA and new GPUDirect
RDMA functionality. Specific findings show initial
promise in scaling of such applications to larger
production deployments targeting large scale
computational workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Guo:2015:PBL,
author = "Fei Guo and Seongbeom Kim and Yury Baskakov and Ishan
Banerjee",
title = "Proactively Breaking Large Pages to Improve Memory
Overcommitment Performance in {VMware ESXi}",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "39--51",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731187",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "VMware ESXi leverages hardware support for MMU
virtualization available in modern Intel/AMD CPUs. To
optimize address translation performance when running
on such CPUs, ESXi preferably uses host large pages
(2MB in x86-64 systems) to back VM's guest memory.
While using host large pages provides best performance
when host has sufficient free memory, it increases host
memory pressure and effectively defeats page sharing.
Hence, the host is more likely to hit the point where
ESXi has to reclaim VM memory through much more
expensive techniques such as ballooning or host
swapping. As a result, using host large pages may
significantly hurt consolidation ratio. To deal with
this problem, we propose a new host large page
management policy that allows to: (a) identify 'cold'
large pages and break them even when host has plenty of
free memory; (b) break all large pages proactively when
host free memory becomes scarce, but before the host
starts ballooning or swapping; (c) reclaim the small
pages within the broken large pages through page
sharing. With the new policy, the shareable small pages
can be shared much earlier and the amount of memory
that needs to be ballooned or swapped can be largely
reduced when host memory pressure is high. We also
propose an algorithm to dynamically adjust the page
sharing rate when proactively breaking large pages
using a VM large page shareability estimator for higher
efficiency. Experimental results show that the proposed
large page management policy can improve the
performance of various workloads up to 2.1x by
significantly reducing the amount of ballooned or
swapped memory when host memory pressure is high.
Applications still fully benefit from host large pages
when memory pressure is low.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Wang:2015:HPI,
author = "Zhe Wang and Jianjun Li and Chenggang Wu and Dongyan
Yang and Zhenjiang Wang and Wei-Chung Hsu and Bin Li
and Yong Guan",
title = "{HSPT}: Practical Implementation and Efficient
Management of Embedded Shadow Page Tables for
Cross-{ISA} System Virtual Machines",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "53--64",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731188",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Cross-ISA (Instruction Set Architecture) system-level
virtual machine has a significant research and
practical value. For example, several recently
announced virtual smart phones for iOS which run smart
phone applications on x86 based PCs are deployed on
cross-ISA system level virtual machines. Also, for
mobile device application development, by emulating the
Android/ARM environment on the more powerful x86-64
platform, application development and debugging become
more convenient and productive. However, the
virtualization layer often incurs high performance
overhead. The key overhead comes from memory
virtualization where a guest virtual address (GVA) must
go through multi-level address translation to become a
host physical address (HPA). The Embedded Shadow Page
Table (ESPT) approach has been proposed to effectively
decrease this address translation cost. ESPT directly
maps GVA to HPA, thus avoid the lengthy guest virtual
to guest physical, guest physical to host virtual, and
host virtual to host physical address translation.
However, the original ESPT work has a few drawbacks.
For example, its implementation relies on a loadable
kernel module (LKM) to manage the shadow page table.
Using LKMs is less desirable for system virtual
machines due to portability, security and
maintainability concerns. Our work proposes a
different, yet more practical, implementation to
address the shortcomings. Instead of relying on using
LKMs, our approach adopts a shared memory mapping
scheme to maintain the shadow page table (SPT) using
only ''mmap'' system call. Furthermore, this work
studies the support of SPT for multi-processing in
greater details. It devices three different SPT
organizations and evaluates their strength and weakness
with standard and real Android applications on the
system virtual machine which emulates the Android/ARM
platform on x86-64 systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Kehne:2015:GEO,
author = "Jens Kehne and Jonathan Metter and Frank Bellosa",
title = "{GPUswap}: Enabling Oversubscription of {GPU} Memory
through Transparent Swapping",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "65--77",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731192",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the last few years, GPUs have been finding their
way into cloud computing platforms, allowing users to
benefit from the performance of GPUs at low cost.
However, a large portion of the cloud's cost advantage
traditionally stems from oversubscription: Cloud
providers rent out more resources to their customers
than are actually available, expecting that the
customers will not actually use all of the promised
resources. For GPU memory, this oversubscription is
difficult due to the lack of support for demand paging
in current GPUs. Therefore, recent approaches to
enabling oversubscription of GPU memory resort to
software scheduling of GPU kernels --- which has been
shown to induce significant runtime overhead in
applications even if sufficient GPU memory is available
--- to ensure that data is present on the GPU when
referenced. In this paper, we present GPUswap, a novel
approach to enabling oversubscription of GPU memory
that does not rely on software scheduling of GPU
kernels. GPUswap uses the GPU's ability to access
system RAM directly to extend the GPU's own memory. To
that end, GPUswap transparently relocates data from the
GPU to system RAM in response to memory pressure.
GPUswap ensures that all data is permanently accessible
to the GPU and thus allows applications to submit
commands to the GPU directly at any time, without the
need for software scheduling. Experiments with our
prototype implementation show that GPU applications can
still execute even with only 20 MB of GPU memory
available. In addition, while software scheduling
suffers from permanent overhead even with sufficient
GPU memory available, our approach executes GPU
applications with native performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Gupta:2015:HER,
author = "Vishal Gupta and Min Lee and Karsten Schwan",
title = "{HeteroVisor}: Exploiting Resource Heterogeneity to
Enhance the Elasticity of Cloud Platforms",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "79--92",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731191",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents HeteroVisor, a heterogeneity-aware
hypervisor, that exploits resource heterogeneity to
enhance the elasticity of cloud systems. Introducing
the notion of 'elasticity' (E) states, HeteroVisor
permits applications to manage their changes in
resource requirements as state transitions that
implicitly move their execution among heterogeneous
platform components. Masking the details of platform
heterogeneity from virtual machines, the E-state
abstraction allows applications to adapt their resource
usage in a fine-grained manner via VM-specific
'elasticity drivers' encoding VM-desired policies. The
approach is explored for the heterogeneous processor
and memory subsystems evolving for modern server
platforms, leading to mechanisms that can manage these
heterogeneous resources dynamically and as required by
the different VMs being run. HeteroVisor is implemented
for the Xen hypervisor, with mechanisms that go beyond
core scaling to also deal with memory resources, via
the online detection of hot memory pages and
transparent page migration. Evaluation on an emulated
heterogeneous platform uses workload traces from
real-world data, demonstrating the ability to provide
high on-demand performance while also reducing resource
usage for these workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Wang:2015:DAA,
author = "Hui Wang and Canturk Isci and Lavanya Subramanian and
Jongmoo Choi and Depei Qian and Onur Mutlu",
title = "{A-DRM}: Architecture-aware Distributed Resource
Management of Virtualized Clusters",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "93--106",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731202",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Virtualization technologies has been widely adopted by
large-scale cloud computing platforms. These
virtualized systems employ distributed resource
management (DRM) to achieve high resource utilization
and energy savings by dynamically migrating and
consolidating virtual machines. DRM schemes usually use
operating-system-level metrics, such as CPU
utilization, memory capacity demand and I/O
utilization, to detect and balance resource contention.
However, they are oblivious to microarchitecture-level
resource interference (e.g., memory bandwidth
contention between different VMs running on a host),
which is currently not exposed to the operating system.
We observe that the lack of visibility into
microarchitecture-level resource interference
significantly impacts the performance of virtualized
systems. Motivated by this observation, we propose a
novel architecture-aware DRM scheme (ADRM), that takes
into account microarchitecture-level resource
interference when making migration decisions in a
virtualized cluster. ADRM makes use of three core
techniques: (1) a profiler to monitor the
microarchitecture-level resource usage behavior online
for each physical host, (2) a memory bandwidth
interference model to assess the interference degree
among virtual machines on a host, and (3) a cost-benefit
analysis to determine a candidate virtual machine and a
host for migration. Real system experiments on thirty
randomly selected combinations of applications from the
CPU2006, PARSEC, STREAM, NAS Parallel Benchmark suites
in a four-host virtualized cluster show that ADRM can
improve performance by up to 26.55\%, with an average
of 9.67\%, compared to traditional DRM schemes that
lack visibility into microarchitecture-level resource
utilization and contention.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Singh:2015:TVC,
author = "Rayman Preet Singh and Tim Brecht and S. Keshav",
title = "Towards {VM} Consolidation Using a Hierarchy of Idle
States",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "107--119",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731195",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Typical VM consolidation approaches re-pack VMs into
fewer physical machines, resulting in energy and cost
savings [13, 19, 23, 40]. Recent work has explored a
just-in time approach to VM consolidation by
transitioning VMsto an inactive state when idle and
activating them on the arrival of client requests[17,
21]. This leads to increased VM density at the cost of
an increase in client request latency (called miss
penalty ). The VM density so obtained, although
greater, is still limited by the number of VMs that can
be hosted in the one inactive state. If idle VMs were
hosted in multiple inactive states, VM density can be
increased further while ensuring small miss penalties.
However, VMs in different inactive states have
different capacities, activation times, and resource
requirements. Therefore, a key question is: How should
VMs be transitioned between different states to
minimize the expected miss penalty? This paper explores
the hosting of idle VMs in a hierarchy of multiple such
inactive states, and studies the effect of different
idle VMmanagement policies on VMdensity and miss
penalties. We formulate a mathematical model for the
problem, and provide a theoretical lower bound on the
miss penalty. Using an off-the-shelf virtualization
solution (LXC [2]), we demonstrate how the required
model parameters can be obtained. We evaluate a variety
of policies and quantify their miss penalties for
different VM densities. We observe that some policies
consolidate up to 550 VMs per machine with average miss
penalties smaller than 1 ms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Kyle:2015:ADA,
author = "Stephen Kyle and Hugh Leather and Bj{\"o}rn Franke and
Dave Butcher and Stuart Monteith",
title = "Application of Domain-aware Binary Fuzzing to Aid
{Android} Virtual Machine Testing",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "121--132",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731198",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "The development of a new application virtual machine
(VM), like the creation of any complex piece of
software, is a bug-prone process. In version 5.0, the
widely-used Android operating system has changed from
the Dalvik VM to the newly-developed ART VM to execute
Android applications. As new iterations of this VM are
released, how can the developers aim to reduce the
number of potentially security-threatening bugs that
make it into the final product? In this paper we
combine domain-aware binary fuzzing and differential
testing to produce DexFuzz, a tool that exploits the
presence of multiple modes of execution within a VM to
test for defects. These modes of execution include the
interpreter and a runtime that executes ahead-of-time
compiled code. We find and present a number of bugs in
the in-development version of ART in the Android Open
Source Project. We also assess DexFuzz's ability to
highlight defects in the experimental version of ART
released in the previous version of Android, 4.4,
finding 189 crashing programs and 15 divergent programs
that indicate defects after only 5,000 attempts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Suneja:2015:EVI,
author = "Sahil Suneja and Canturk Isci and Eyal de Lara and
Vasanth Bala",
title = "Exploring {VM} Introspection: Techniques and
Trade-offs",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "133--146",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731196",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "While there are a variety of existing virtual machine
introspection (VMI) techniques, their latency,
overhead, complexity and consistency trade-offs are not
clear. In this work, we address this gap by first
organizing the various existing VMI techniques into a
taxonomy based upon their operational principles, so
that they can be put into context. Next we perform a
thorough exploration of their trade-offs both
qualitatively and quantitatively. We present a
comprehensive set of observations and best practices
for efficient, accurate and consistent VMI operation
based on our experiences with these techniques. Our
results show the stunning range of variations in
performance, complexity and overhead with different VMI
techniques.We further present a deep dive on VMI
consistency aspects to understand the sources of
inconsistency in observed VM state and show that,
contrary to common expectation, pause-and-introspect
based VMI techniques achieve very little to improve
consistency despite their substantial performance
impact.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Zeng:2015:PPH,
author = "Junyuan Zeng and Yangchun Fu and Zhiqiang Lin",
title = "{PEMU}: a Pin Highly Compatible Out-of-{VM} Dynamic
Binary Instrumentation Framework",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "147--160",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731201",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Over the past 20 years, we have witnessed a widespread
adoption of dynamic binary instrumentation (DBI) for
numerous program analyses and security applications
including program debugging, profiling, reverse
engineering, and malware analysis. To date, there are
many DBI platforms, and the most popular one is Pin,
which provides various instrumentation APIs for process
instrumentation. However, Pin does not support the
instrumentation of OS kernels. In addition, the
execution of the instrumentation and analysis routine
is always inside the virtual machine (VM).
Consequently, it cannot support any out-of-VM
introspection that requires strong isolation.
Therefore, this paper presents PEMU, a new open source
DBI framework that is compatible with Pin-APIs, but
supports out-of-VM introspection for both user level
processes and OS kernels. Unlike in-VM instrumentation
in which there is no semantic gap, for out-of-VM
introspection we have to bridge the semantic gap and
provide abstractions (i.e., APIs) for programmers. One
important feature of PEMU is its API compatibility with
Pin. As such, many Pin plugins are able to execute atop
PEMU without any source code modification. We have
implemented PEMU, and our experimental results with the
SPEC 2006 benchmarks show that PEMU introduces
reasonable overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Jaffer:2015:IRD,
author = "Shehbaz Jaffer and Piyus Kedia and Sorav Bansal",
title = "Improving Remote Desktopping Through Adaptive
Record\slash Replay",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "161--172",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731193",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Accessing the display of a computer remotely, is
popularly called remote desktopping. Remote desktopping
software installs at both the user-facing client
computer and the remote server computer; it simulates
user's input events at server, and streams the
corresponding display changes to client, thus providing
an illusion to the user of controlling the remote
machine using local input devices (e.g.,
keyboard/mouse). Many such remote desktopping tools are
widely used. We show that if the remote server is a
virtual machine (VM) and the client is reasonably
powerful (e.g., current laptop and desktop grade
hardware), VM deterministic replay capabilities can be
used adaptively to significantly reduce the network
bandwidth consumption and server-side CPU utilization
of a remote desktopping tool. We implement these
optimizations in a tool based on Qemu/KVM
virtualization platform and VNC remote desktopping
platform. Our tool reduces VNC's network bandwidth
consumption by up to 9x and server-side CPU utilization
by up to 56\% for popular graphics-intensive
applications. On the flip side, our techniques consume
higher CPU/memory/disk resources at the client. The
effect of our optimizations on user-perceived latency
is negligible.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Oh:2015:MWA,
author = "JinSeok Oh and Jin-woo Kwon and Hyukwoo Park and
Soo-Mook Moon",
title = "Migration of {Web} Applications with Seamless
Execution",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "173--185",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731197",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Web applications (apps) are programmed using HTML5,
CSS, and JavaScript, and are distributed in the source
code format. Web apps can be executed on any devices
where a web browser is installed, allowing one-source,
multi-platform environment. We can exploit this
advantage of platform independence for a new user
experience called app migration, which allows migrating
an app in the middle of execution seamlessly between
smart devices. This paper proposes such a migration
framework for web apps where we can save the current
state of a running app and resume its execution on a
different device by restoring the saved state. We save
the web app's state in the form of a snapshot, which is
actually another web app whose execution can restore
the saved state. In the snapshot, the state of the
JavaScript variables and DOM trees are saved using the
JSON format. We solved some of the saving/restoring
problems related to event handlers and closures by
accessing the browser and the JavaScript engine
internals. Our framework does not require instrumenting
an app or changing its source code, but works for the
original app. We implemented the framework on the
Chrome browser with the V8 JavaScript engine and
successfully migrated non-trivial sample apps with
reasonable saving and restoring overhead. We also
discuss other usage of the snapshot for optimizations
and user experiences for the web platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Ren:2015:ASE,
author = "Jianbao Ren and Yong Qi and Yuehua Dai and Xiaoguang
Wang and Yi Shi",
title = "{AppSec}: a Safe Execution Environment for Security
Sensitive Applications",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "187--199",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731199",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Malicious OS kernel can easily access user's private
data in main memory and pries human-machine interaction
data, even one that employs privacy enforcement based
on application level or OS level. This paper introduces
AppSec, a hypervisor-based safe execution environment,
to protect both the memory data and human-machine
interaction data of security sensitive applications
from the untrusted OS transparently. AppSec provides
several security mechanisms on an untrusted OS. AppSec
introduces a safe loader to check the code integrity of
application and dynamic shared objects. During runtime,
AppSec protects application and dynamic shared objects
from being modified and verifies kernel memory accesses
according to application's intention. AppSec provides a
devices isolation mechanism to prevent the
human-machine interaction devices being accessed by
compromised kernel. On top of that, AppSec further
provides a privileged-based window system to protect
application's X resources. The major advantages of
AppSec are threefold. First, AppSec verifies and
protects all dynamic shared objects during runtime.
Second, AppSec mediates kernel memory access according
to application's intention but not encrypts all
application's data roughly. Third, AppSec provides a
trusted I/O path from end-user to application. A
prototype of AppSec is implemented and shows that
AppSec is efficient and practical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Jin:2015:HAS,
author = "Seongwook Jin and Jinho Seol and Jaehyuk Huh and
Seungryoul Maeng",
title = "Hardware-Assisted Secure Resource Accounting under a
Vulnerable Hypervisor",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "201--213",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731203",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "With the proliferation of cloud computing to outsource
computation in remote servers, the accountability of
computational resources has emerged as an important new
challenge for both cloud users and providers. Among the
cloud resources, CPU and memory are difficult to verify
their actual allocation, since the current
virtualization techniques attempt to hide the
discrepancy between physical and virtual allocations
for the two resources. This paper proposes an online
verifiable resource accounting technique for CPU and
memory allocation for cloud computing. Unlike prior
approaches for cloud resource accounting, the proposed
accounting mechanism, called Hardware-assisted Resource
Accounting (HRA), uses the hardware support for system
management mode (SMM) and virtualization to provide
secure resource accounting, even if the hypervisor is
compromised. Using a secure isolated execution support
of SMM, this study investigates two aspects of
verifiable resource accounting for cloud systems.
First, this paper presents how the hardware-assisted
SMM and virtualization techniques can be used to
implement the secure resource accounting mechanism even
under a compromised hypervisor. Second, the paper
investigates a sample-based resource accounting
technique to minimize performance overheads. Using a
statistical random sampling method, the technique
estimates the overall CPU and memory allocation status
with 99\%~100\% accuracies and performance degradations
of 0.1\%~0.5\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Cui:2015:PPA,
author = "Lei Cui and Tianyu Wo and Bo Li and Jianxin Li and Bin
Shi and Jinpeng Huai",
title = "{PARS}: a Page-Aware Replication System for
Efficiently Storing Virtual Machine Snapshots",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "215--228",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731190",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Virtual machine (VM) snapshot enhances the system
availability by saving the running state into stable
storage during failure-free execution and rolling back
to the snapshot point upon failures. Unfortunately, the
snapshot state may be lost due to disk failures, so
that the VM fails to be recovered. The popular
distributed file systems employ replication technique
to tolerate disk failures by placing redundant copies
across disperse disks. However, unless user-specific
personalization is provided, these systems consider the
data in the file as of same importance and create
identical copies of the entire file, leading to
non-trivial additional storage overhead. This paper
proposes a page-aware replication system (PARS) to
store VM snapshots efficiently. PARS employs VM
introspection technique to explore how a page is used
by guest, and classifies the pages by their importance
to system execution. If a page is critical, PARS
replicates it multiple copies to ensure high
availability and long-term durability. Otherwise, the
loss of this page causes no harm for system to work
properly, PARS therefore saves only one copy of the
page. Consequently, PARS improves storage efficiency
without compromising availability. We have implemented
PARS to justify its practicality. The experimental
results demonstrate that PARS achieves 53.9\% space
saving compared to the native replication approach in
HDFS which replicates the whole snapshot file fully and
identically.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '15 conference proceedings.",
}
@Article{Gramoli:2015:MTY,
author = "Vincent Gramoli",
title = "More than you ever wanted to know about
synchronization: synchrobench, measuring the impact of
the synchronization on concurrent algorithms",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "1--10",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688501",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we present the most extensive
comparison of synchronization techniques. We evaluate 5
different synchronization techniques through a series
of 31 data structure algorithms from the recent
literature on 3 multicore platforms from Intel, Sun
Microsystems and AMD. To this end, we developed in
C/C++ and Java a new micro-benchmark suite, called
Synchrobench, hence helping the community evaluate new
data structures and synchronization techniques. The
main conclusion of this evaluation is threefold: (i)
although compare-and-swap helps achieving the best
performance on multicores, doing so correctly is hard;
(ii) optimistic locking offers varying performance
results while transactional memory offers more
consistent results; and (iii) copy-on-write and
read-copy-update suffer more from contention than any
other technique but could be combined with others to
derive efficient algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Alistarh:2015:SSR,
author = "Dan Alistarh and Justin Kopinsky and Jerry Li and Nir
Shavit",
title = "The {SprayList}: a scalable relaxed priority queue",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "11--20",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688523",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High-performance concurrent priority queues are
essential for applications such as task scheduling and
discrete event simulation. Unfortunately, even the best
performing implementations do not scale past a number
of threads in the single digits. This is because of the
sequential bottleneck in accessing the elements at the
head of the queue in order to perform a DeleteMin
operation. In this paper, we present the SprayList, a
scalable priority queue with relaxed ordering
semantics. Starting from a non-blocking SkipList, the
main innovation behind our design is that the DeleteMin
operations avoid a sequential bottleneck by
``spraying'' themselves onto the head of the SkipList
list in a coordinated fashion. The spraying is
implemented using a carefully designed random walk, so
that DeleteMin returns an element among the first $O(p
\log^3 p)$ in the list, with high probability, where $p$ is
the number of threads. We prove that the running time
of a DeleteMin operation is $O(\log^3 p)$, with high
probability, independent of the size of the list. Our
experiments show that the relaxed semantics allow the
data structure to scale for high thread counts,
comparable to a classic unordered SkipList.
Furthermore, we observe that, for reasonably parallel
workloads, the scalability benefits of relaxation
considerably outweigh the additional work due to
out-of-order execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Arbel:2015:PRR,
author = "Maya Arbel and Adam Morrison",
title = "Predicate {RCU}: an {RCU} for scalable concurrent
updates",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "21--30",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Read-copy update (RCU) is a shared memory
synchronization mechanism with scalable
synchronization-free reads that nevertheless execute
correctly with concurrent updates. To guarantee the
consistency of such reads, an RCU update transitioning
the data structure between certain states must wait for
the completion of all existing reads. Unfortunately,
these waiting periods quickly become a bottleneck, and
thus RCU remains unused in data structures that require
scalable, fine-grained, update operations. To solve
this problem, we present Predicate RCU (PRCU), an RCU
variant in which an update waits only for the reads
whose consistency it affects, which are specified by a
user-supplied predicate. We explore the trade-offs in
implementing PRCU, describing implementations that
reduce wait times by 10--100x with varying overhead on
reads on modern x86 multiprocessor machines. We
demonstrate the applicability of PRCU by applying it to
two RCU-based concurrent algorithms---the Citrus binary
search tree and a resizable hash table---and show
experimentally that PRCU significantly improves the
performance of both algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Golan-Gueta:2015:ASA,
author = "Guy Golan-Gueta and G. Ramalingam and Mooly Sagiv and
Eran Yahav",
title = "Automatic scalable atomicity via semantic locking",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "31--41",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we consider concurrent programs in
which the shared state consists of instances of
linearizable ADTs (abstract data types). We present an
automated approach to concurrency control that
addresses a common need: the need to atomically execute
a code fragment, which may contain multiple ADT
operations on multiple ADT instances. We present a
synthesis algorithm that automatically enforces
atomicity of given code fragments (in a client program)
by inserting pessimistic synchronization that
guarantees atomicity and deadlock-freedom (without
using any rollback mechanism). Our algorithm takes a
commutativity specification as an extra input. This
specification indicates for every pair of ADT
operations the conditions under which the operations
commute. Our algorithm enables greater parallelism by
permitting commuting operations to execute
concurrently. We have implemented the synthesis
algorithm in a Java compiler, and applied it to several
Java programs. Our results show that our approach
produces efficient and scalable synchronization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Benson:2015:FPP,
author = "Austin R. Benson and Grey Ballard",
title = "A framework for practical parallel fast matrix
multiplication",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "42--53",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688513",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Matrix multiplication is a fundamental computation in
many scientific disciplines. In this paper, we show
that novel fast matrix multiplication algorithms can
significantly outperform vendor implementations of the
classical algorithm and Strassen's fast algorithm on
modest problem sizes and shapes. Furthermore, we show
that the best choice of fast algorithm depends not only
on the size of the matrices but also the shape. We
develop a code generation tool to automatically
implement multiple sequential and shared-memory
parallel variants of each fast algorithm, including our
novel parallelization scheme. This allows us to rapidly
benchmark over 20 fast algorithms on several problem
sizes. Furthermore, we discuss a number of practical
implementation issues for these algorithms on
shared-memory machines that can direct further research
on making fast algorithms practical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "AMD Core Math Library (ACML); Cray Scientific Library
(LibSci); IBM Engineering and Scientific Subroutine
Library (ESSL); Intel MKL; LINPACK benchmark; numerical
instability of $O(N^p)$ algorithms with $p < 3$;
Strassen matrix multiplication; Strassen--Winograd
algorithm",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Acharya:2015:PNC,
author = "Aravind Acharya and Uday Bondhugula",
title = "{PLUTO+}: near-complete modeling of affine
transformations for parallelism and locality",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "54--64",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Affine transformations have proven to be very powerful
for loop restructuring due to their ability to model a
very wide range of transformations. A single
multi-dimensional affine function can represent a long
and complex sequence of simpler transformations.
Existing affine transformation frameworks like the
Pluto algorithm, that include a cost function for
modern multicore architectures where coarse-grained
parallelism and locality are crucial, consider only a
sub-space of transformations to avoid a combinatorial
explosion in finding the transformations. The ensuing
practical trade-offs lead to the exclusion of certain
useful transformations, in particular, transformation
compositions involving loop reversals and loop skewing
by negative factors. In this paper, we propose an
approach to address this limitation by modeling a much
larger space of affine transformations in conjunction
with the Pluto algorithm's cost function. We perform an
experimental evaluation of both, the effect on
compilation time, and performance of generated codes.
The evaluation shows that our new framework, Pluto+,
provides no degradation in performance in any of the
Polybench benchmarks. For Lattice Boltzmann Method
(LBM) codes with periodic boundary conditions, it
provides a mean speedup of 1.33x over Pluto. We also
show that Pluto+ does not increase compile times
significantly. Experimental results on Polybench show
that Pluto+ increases overall polyhedral
source-to-source optimization time only by 15\%. In
cases where it improves execution time significantly,
it increased polyhedral optimization time only by
2.04x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Ravishankar:2015:DMC,
author = "Mahesh Ravishankar and Roshan Dathathri and Venmugil
Elango and Louis-No{\"e}l Pouchet and J. Ramanujam and
Atanas Rountev and P. Sadayappan",
title = "Distributed memory code generation for mixed
irregular\slash regular computations",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "65--75",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688515",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many applications feature a mix of irregular and
regular computational structures. For example, codes
using adaptive mesh refinement (AMR) typically use a
collection of regular blocks, where the number of
blocks and the relationship between blocks is
irregular. The computational structure in such
applications generally involves regular (affine) loop
computations within some number of innermost loops,
while outer loops exhibit irregularity due to
data-dependent control flow and indirect array access
patterns. Prior approaches to distributed memory
parallelization do not handle such computations
effectively. They either target loop nests that are
completely affine using polyhedral frameworks, or treat
all loops as irregular. Consequently, the generated
distributed memory code contains artifacts that disrupt
the regular nature of previously affine innermost loops
of the computation. This hampers subsequent
optimizations to improve on-node performance. We
propose a code generation framework that can
effectively transform such applications for execution
on distributed memory systems. Our approach generates
distributed memory code which preserves program
properties that enable subsequent polyhederal
optimizations. Simultaneously, it addresses a major
memory bottleneck of prior techniques that limits the
scalability of the generated code. The effectiveness of
the proposed framework is demonstrated on computations
that are mixed regular/irregular, completely regular,
and completely irregular.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Xiang:2015:SPH,
author = "Lingxiang Xiang and Michael L. Scott",
title = "Software partitioning of hardware transactions",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "76--86",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688506",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Best-effort hardware transactional memory (HTM) allows
complex operations to execute atomically and in
parallel, so long as hardware buffers do not overflow,
and conflicts are not encountered with concurrent
operations. We describe a programming technique and
compiler support to reduce both overflow and conflict
rates by partitioning common operations into
read-mostly (planning) and write-mostly (completion)
operations, which then execute separately. The
completion operation remains transactional; planning
can often occur in ordinary code. High-level (semantic)
atomicity for the overall operation is ensured by
passing an application-specific validator object
between planning and completion. Transparent
composition of partitioned operations is made possible
through fully-automated compiler support, which
migrates all planning operations out of the parent
transaction while respecting all program data flow and
dependences. For both micro- and macro-benchmarks,
experiments on IBM z-Series and Intel Haswell machines
demonstrate that partitioning can lead to dramatically
lower abort rates and higher scalability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Baldassin:2015:PID,
author = "Alexandro Baldassin and Edson Borin and Guido Araujo",
title = "Performance implications of dynamic memory allocators
on transactional memory systems",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "87--96",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688504",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Although dynamic memory management accounts for a
significant part of the execution time on many modern
software systems, its impact on the performance of
transactional memory systems has been mostly
overlooked. In order to shed some light into this
subject, this paper conducts a thorough investigation
of the interplay between memory allocators and software
transactional memory (STM) systems. We show that
allocators can interfere with the way memory addresses
are mapped to versioned locks on state-of-the-art
software transactional memory implementations.
Moreover, we observed that key aspects of allocators
such as false sharing avoidance, scalability, and
locality have a drastic impact on the final
performance. For instance, we have detected performance
differences of up to 171\% in the STAMP applications
when using distinct allocators. Moreover, we show that
optimizations at the STM-level (such as caching
transactional objects) are not effective when a modern
allocator is already in use. All in all, our study
highlights the importance of reporting the allocator
utilized in the performance evaluation of transactional
memory systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Zhang:2015:LOS,
author = "Minjia Zhang and Jipeng Huang and Man Cao and Michael
D. Bond",
title = "Low-overhead software transactional memory with
progress guarantees and strong semantics",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "97--108",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688510",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software transactional memory offers an appealing
alternative to locks by improving programmability,
reliability, and scalability. However, existing STMs
are impractical because they add high instrumentation
costs and often provide weak progress guarantees and/or
semantics. This paper introduces a novel STM called
LarkTM that provides three significant features. (1)
Its instrumentation adds low overhead except when
accesses actually conflict, enabling low single-thread
overhead and scaling well on low-contention workloads.
(2) It uses eager concurrency control mechanisms, yet
naturally supports flexible conflict resolution,
enabling strong progress guarantees. (3) It naturally
provides strong atomicity semantics at low cost.
LarkTM's design works well for low-contention
workloads, but adds significant overhead under higher
contention, so we design an adaptive version of LarkTM
that uses alternative concurrency control for
high-contention objects. An implementation and
evaluation in a Java virtual machine show that the
basic and adaptive versions of LarkTM not only provide
low single-thread overhead, but their multithreaded
performance compares favorably with existing
high-performance STMs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Chabbi:2015:BEP,
author = "Milind Chabbi and Wim Lavrijsen and Wibe de Jong and
Koushik Sen and John Mellor-Crummey and Costin Iancu",
title = "Barrier elision for production parallel programs",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "109--119",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688502",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large scientific code bases are often composed of
several layers of runtime libraries, implemented in
multiple programming languages. In such situation,
programmers often choose conservative synchronization
patterns leading to suboptimal performance. In this
paper, we present context-sensitive dynamic
optimizations that elide barriers redundant during the
program execution. In our technique, we perform data
race detection alongside the program to identify
redundant barriers in their calling contexts; after an
initial learning, we start eliding all future instances
of barriers occurring in the same calling context. We
present an automatic on-the-fly optimization and a
multi-pass guided optimization. We apply our techniques
to NWChem--a 6 million line computational chemistry
code written in C/C++/Fortran that uses several runtime
libraries such as Global Arrays, ComEx, DMAPP, and MPI.
Our technique elides a surprisingly high fraction of
barriers (as many as 63\%) in production runs. This
redundancy elimination translates to application
speedups as high as 14\% on 2048 cores. Our techniques
also provided valuable insight about the application
behavior, later used by NWChem developers. Overall, we
demonstrate the value of holistic context-sensitive
analyses that consider the domain science in
conjunction with the associated runtime software
stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Thebault:2015:SEI,
author = "Lo{\"\i}c Th{\'e}bault and Eric Petit and Quang Dinh",
title = "Scalable and efficient implementation of {$3$D}
unstructured meshes computation: a case study on matrix
assembly",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "120--129",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Exposing massive parallelism on 3D unstructured meshes
computation with efficient load balancing and minimal
synchronizations is challenging. Current approaches
relying on domain decomposition and mesh coloring
struggle to scale with the increasing number of cores
per nodes, especially with new many-core processors. In
this paper, we propose an hybrid approach using domain
decomposition to exploit distributed memory
parallelism, Divide-and-Conquer, D{\&}C, to exploit
shared memory parallelism and improve locality, and
mesh coloring at core level to exploit vectors. It
illustrates a new trade-off for many-cores between
structuredness, memory locality, and vectorization. We
evaluate our approach on the finite element matrix
assembly of an industrial fluid dynamic code developed
by Dassault Aviation. We compare our D{\&}C approach to
domain decomposition and to mesh coloring. D{\&}C
achieves a high parallel efficiency, a good data
locality as well as an improved bandwidth usage. It
competes on current nodes with the optimized pure MPI
version with a minimum 10\% speed-up. D{\&}C shows an
impressive 319x strong scaling on 512 cores (32 nodes)
with only 2000 vertices per core. Finally, the Intel
Xeon Phi version has a performance similar to 10 Intel
E5-2665 Xeon Sandy Bridge cores and 95\% parallel
efficiency on the 60 physical cores. Running on 4 Xeon
Phi (240 cores), D{\&}C has 92\% efficiency on the
physical cores and performance similar to 33 Intel
E5-2665 Xeon Sandy Bridge cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Tallent:2015:DCS,
author = "Nathan R. Tallent and Abhinav Vishnu and Hubertus {Van
Dam} and Jeff Daily and Darren J. Kerbyson and Adolfy
Hoisie",
title = "Diagnosing the causes and severity of one-sided
message contention",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "130--139",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688516",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Two trends suggest network contention for one-sided
messages is poised to become a performance problem that
concerns application developers: an increased interest
in one-sided programming models and a rising ratio of
hardware threads to network injection bandwidth. Often
it is difficult to reason about when one-sided tasks
decrease or increase network contention. We present
effective and portable techniques for diagnosing the
causes and severity of one-sided message contention. To
detect that a message is affected by contention, we
maintain statistics representing instantaneous network
resource demand. Using lightweight measurement and
modeling, we identify the portion of a message's
latency that is due to contention and whether
contention occurs at the initiator or target. We
attribute these metrics to program statements in their
full static and dynamic context. We characterize
contention for an important computational chemistry
benchmark on InfiniBand, Cray Aries, and IBM Blue
Gene/Q interconnects. We pinpoint the sources of
contention, estimate their severity, and show that when
message delivery time deviates from an ideal model,
there are other messages contending for the same
network links. With a small change to the benchmark, we
reduce contention by 50\% and improve total runtime by
20\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Chang:2015:PAG,
author = "Yen-Jung Chang and Vijay K. Garg",
title = "A parallel algorithm for global states enumeration in
concurrent systems",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "140--149",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688520",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Verifying the correctness of the executions of a
concurrent program is difficult because of its
nondeterministic behavior. One of the verification
methods is predicate detection, which predicts whether
the user specified condition (predicate) could become
true in any global states of the program. The method is
predictive because it generates inferred execution
paths from the observed execution path and then checks
the predicate on the global states of inferred paths.
One important part of predicate detection is global
states enumeration, which generates the global states
on inferred paths. Cooper and Marzullo gave the first
enumeration algorithm based on a breadth first strategy
(BFS). Later, many algorithms have been proposed to
improve space and time complexity. None of them,
however, takes parallelism into consideration. In this
paper, we present the first parallel and online
algorithm, named ParaMount, for global state
enumeration. Our experimental results show that
ParaMount speeds up the existing sequential algorithms
by a factor of 6 with 8 threads. We have implemented an
online predicate detector using ParaMount. For
predicate detection, our detector based on ParaMount is
10 to 50 times faster than RV runtime (a verification
tool that uses Cooper and Marzullo's BFS enumeration
algorithm).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Cogumbreiro:2015:DDV,
author = "Tiago Cogumbreiro and Raymond Hu and Francisco Martins
and Nobuko Yoshida",
title = "Dynamic deadlock verification for general barrier
synchronisation",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "150--160",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688519",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Armus, a dynamic verification tool for
deadlock detection and avoidance specialised in barrier
synchronisation. Barriers are used to coordinate the
execution of groups of tasks, and serve as a building
block of parallel computing. Our tool verifies more
barrier synchronisation patterns than current
state-of-the-art. To improve the scalability of
verification, we introduce a novel event-based
representation of concurrency constraints, and a
graph-based technique for deadlock analysis. The
implementation is distributed and fault-tolerant, and
can verify X10 and Java programs. To formalise the
notion of barrier deadlock, we introduce a core
language expressive enough to represent the three most
widespread barrier synchronisation patterns: group,
split-phase, and dynamic membership. We propose a graph
analysis technique that selects from two alternative
graph representations: the Wait-For Graph, that favours
programs with more tasks than barriers; and the State
Graph, optimised for programs with more barriers than
tasks. We prove that finding a deadlock in either
representation is equivalent, and that the verification
algorithm is sound and complete with respect to the
notion of deadlock in our core language. Armus is
evaluated with three benchmark suites in local and
distributed scenarios. The benchmarks show that graph
analysis with automatic graph-representation selection
can record a 7-fold execution increase versus the
traditional fixed graph representation. The performance
measurements for distributed deadlock detection between
64 processes show negligible overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{You:2015:VFO,
author = "Yi-Ping You and Hen-Jung Wu and Yeh-Ning Tsai and
Yen-Ting Chao",
title = "{VirtCL}: a framework for {OpenCL} device abstraction
and management",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "161--172",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688505",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "The interest in using multiple graphics processing
units (GPUs) to accelerate applications has increased
in recent years. However, the existing heterogeneous
programming models (e.g., OpenCL) abstract details of
GPU devices at the per-device level and require
programmers to explicitly schedule their kernel tasks
on a system equipped with multiple GPU devices.
Unfortunately, multiple applications running on a
multi-GPU system may compete for some of the GPU
devices while leaving other GPU devices unused.
Moreover, the distributed memory model defined in
OpenCL, where each device has its own memory space,
increases the complexity of managing the memory among
multiple GPU devices. In this article we propose a
framework (called VirtCL) that reduces the programming
burden by acting as a layer between the programmer and
the native OpenCL run-time system for abstracting
multiple devices into a single virtual device and for
scheduling computations and communications among the
multiple devices. VirtCL comprises two main components:
(1) a front-end library, which exposes primary OpenCL
APIs and the virtual device, and (2) a back-end
run-time system (called CLDaemon) for scheduling and
dispatching kernel tasks based on a history-based
scheduler. The front-end library forwards computation
requests to the back-end CLDaemon, which then schedules
and dispatches the requests. We also propose a
history-based scheduler that is able to schedule kernel
tasks in a contention- and communication-aware manner.
Experiments demonstrated that the VirtCL framework
introduced a small overhead (mean of 6\%) but
outperformed the native OpenCL run-time system for most
benchmarks in the Rodinia benchmark suite, which was
due to the abstraction layer eliminating the
time-consuming initialization of OpenCL contexts. We
also evaluated different scheduling policies in VirtCL
with a real-world application (clsurf) and various
synthetic workload traces. The results indicated that
the VirtCL framework provides scalability for multiple
kernel tasks running on multi-GPU systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Ashari:2015:OML,
author = "Arash Ashari and Shirish Tatikonda and Matthias Boehm
and Berthold Reinwald and Keith Campbell and John
Keenleyside and P. Sadayappan",
title = "On optimizing machine learning workloads via kernel
fusion",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "173--182",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Exploitation of parallel architectures has become
critical to scalable machine learning (ML). Since a
wide range of ML algorithms employ linear algebraic
operators, GPUs with BLAS libraries are a natural
choice for such an exploitation. Two approaches are
commonly pursued: (i) developing specific GPU
accelerated implementations of complete ML algorithms;
and (ii) developing GPU kernels for primitive linear
algebraic operators like matrix-vector multiplication,
which are then used in developing ML algorithms. This
paper extends the latter approach by developing fused
kernels for a combination of primitive operators that
are commonly found in popular ML algorithms. We
identify the generic pattern of computation (alpha *
X^T (v * (X * y)) + beta * z) and its various
instantiations. We develop a fused kernel to optimize
this computation on GPUs --- with specialized
techniques to handle both sparse and dense matrices.
This approach not only reduces the cost of data loads
due to improved temporal locality but also enables
other optimizations like coarsening and hierarchical
aggregation of partial results. We also present an
analytical model that considers input data
characteristics and available GPU resources to estimate
near-optimal settings for kernel launch parameters. The
proposed approach provides speedups ranging from 2 to
67 for different instances of the generic pattern
compared to launching multiple operator-level kernels
using GPU accelerated libraries. We conclude by
demonstrating the effectiveness of the approach in
improving end-to-end performance on an entire ML
algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Zhang:2015:NAG,
author = "Kaiyuan Zhang and Rong Chen and Haibo Chen",
title = "{NUMA}-aware graph-structured analytics",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "183--193",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688507",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graph-structured analytics has been widely adopted in
a number of big data applications such as social
computation, web-search and recommendation systems.
Though much prior research focuses on scaling
graph-analytics on distributed environments, the strong
desire on performance per core, dollar and joule has
generated considerable interests of processing
large-scale graphs on a single server-class machine,
which may have several terabytes of RAM and 80 or more
cores. However, prior graph-analytics systems are
largely neutral to NUMA characteristics and thus have
suboptimal performance. This paper presents a detailed
study of NUMA characteristics and their impact on the
efficiency of graph-analytics. Our study uncovers two
insights: (1) either random or interleaved allocation of
graph data will significantly hamper data locality and
parallelism; (2) sequential inter-node (i.e., remote)
memory accesses have much higher bandwidth than both
intra- and inter-node random ones. Based on them, this
paper describes Polymer, a NUMA-aware graph-analytics
system on multicore with two key design decisions.
First, Polymer differentially allocates and places
topology data, application-defined data and mutable
runtime states of a graph system according to their
access patterns to minimize remote accesses. Second,
for some remaining random accesses, Polymer carefully
converts random remote accesses into sequential remote
accesses, by using lightweight replication of vertices
across NUMA nodes. To improve load balance and vertex
convergence, Polymer is further built with a
hierarchical barrier to boost parallelism and locality,
an edge-oriented balanced partitioning for skewed
graphs, and adaptive data structures according to the
proportion of active vertices. A detailed evaluation on
an 80-core machine shows that Polymer often outperforms
the state-of-the-art single-machine graph-analytics
systems, including Ligra, X-Stream and Galois, for a
set of popular real-world and synthetic graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Xie:2015:SAT,
author = "Chenning Xie and Rong Chen and Haibing Guan and Binyu
Zang and Haibo Chen",
title = "{SYNC} or {ASYNC}: time to fuse for distributed
graph-parallel computation",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "194--204",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688508",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large-scale graph-structured computation usually
exhibits iterative and convergence-oriented computing
nature, where input data is computed iteratively until
a convergence condition is reached. Such features have
led to the development of two different computation
modes for graph-structured programs, namely synchronous
(Sync) and asynchronous (Async) modes. Unfortunately,
there is currently no in-depth study on their execution
properties and thus programmers have to manually choose
a mode, either requiring a deep understanding of
underlying graph engines, or suffering from suboptimal
performance. This paper makes the first comprehensive
characterization on the performance of the two modes on
a set of typical graph-parallel applications. Our study
shows that the performance of the two modes varies
significantly with different graph algorithms,
partitioning methods, execution stages, input graphs
and cluster scales, and no single mode consistently
outperforms the other. To this end, this paper proposes
Hsync, a hybrid graph computation mode that adaptively
switches a graph-parallel program between the two modes
for optimal performance. Hsync constantly collects
execution statistics on-the-fly and leverages a set of
heuristics to predict future performance and determine
when a mode switch could be profitable. We have built
online sampling and offline profiling approaches
combined with a set of heuristics to accurately
predicting future performance in the two modes. A
prototype called PowerSwitch has been built based on
PowerGraph, a state-of-the-art distributed
graph-parallel system, to support adaptive execution of
graph algorithms. On a 48-node EC2-like cluster,
PowerSwitch consistently outperforms the best of both
modes, with a speedup ranging from 9\% to 73\% due to
timely switch between two modes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Tang:2015:COW,
author = "Yuan Tang and Ronghui You and Haibin Kan and Jesmin
Jahan Tithi and Pramod Ganapathi and Rezaul A.
Chowdhury",
title = "Cache-oblivious wavefront: improving parallelism of
recursive dynamic programming algorithms without losing
cache-efficiency",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "205--214",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688514",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "State-of-the-art cache-oblivious parallel algorithms
for dynamic programming (DP) problems usually guarantee
asymptotically optimal cache performance without any
tuning of cache parameters, but they often fail to
exploit the theoretically best parallelism at the same
time. While these algorithms achieve cache-optimality
through the use of a recursive divide-and-conquer (DAC)
strategy, scheduling tasks at the granularity of task
dependency introduces artificial dependencies in
addition to those arising from the defining recurrence
equations. We removed the artificial dependency by
scheduling tasks ready for execution as soon as all its
real dependency constraints are satisfied, while
preserving the cache-optimality by inheriting the DAC
strategy. We applied our approach to a set of widely
known dynamic programming problems, such as
Floyd-Warshall's All-Pairs Shortest Paths, Stencil, and
LCS. Theoretical analyses show that our techniques
improve the span of 2-way DAC-based Floyd Warshall's
algorithm on an $n$ node graph from $ T h n^2 n$ to $ T
h n$, stencil computations on a $d$-dimensional
hypercubic grid of width $w$ for $h$ time steps from $
T h(d^2 h) w^(d + 2) - 1$ to $ T h h$, and LCS on
two sequences of length $n$ each from $ T h n^_2 3$ to
$ T h n$. In each case, the total work and cache
complexity remain asymptotically optimal. Experimental
measurements exhibit a $3$ --- $5$ times improvement in
absolute running time, $ 10$ --- $ 20$ times
improvement in burdened span by Cilkview, and
approximately the same L1/L2 cache misses by PAPI.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Chabbi:2015:HPL,
author = "Milind Chabbi and Michael Fagan and John
Mellor-Crummey",
title = "High performance locks for multi-level {NUMA}
systems",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "215--226",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688503",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficient locking mechanisms are critically important
for high performance computers. On highly-threaded
systems with a deep memory hierarchy, the throughput of
traditional queueing locks, e.g., MCS locks, falls off
due to NUMA effects. Two-level cohort locks perform
better on NUMA systems, but fail to deliver top
performance for deep NUMA hierarchies. In this paper,
we describe a hierarchical variant of the MCS lock that
adapts the principles of cohort locking for
architectures with deep NUMA hierarchies. We describe
analytical models for throughput and fairness of
Cohort-MCS (C-MCS) and Hierarchical MCS (HMCS) locks
that enable us to tailor these locks for high
performance on any target platform without empirical
tuning. Using these models, one can select parameters
such that an HMCS lock will deliver better fairness
than a C-MCS lock for a given throughput, or deliver
better throughput for a given fairness. Our experiments
show that, under high contention, a three-level HMCS
lock delivers up to 7.6x higher lock throughput than a
C-MCS lock on a 128-thread IBM Power 755 and a
five-level HMCS lock delivers up to 72x higher lock
throughput on a 4096-thread SGI UV 1000. On the K-means
clustering code from the MineBench suit, a three-level
HMCS lock reduces the running time by up to 55\%
compared to the C-MCS lock on a IBM Power 755.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Majo:2015:LPC,
author = "Zoltan Majo and Thomas R. Gross",
title = "A library for portable and composable data locality
optimizations for {NUMA} systems",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "227--238",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many recent multiprocessor systems are realized with a
non-uniform memory architecture (NUMA) and accesses to
remote memory locations take more time than local
memory accesses. Optimizing NUMA memory system
performance is difficult and costly for three principal
reasons: (1) today's programming languages/libraries
have no explicit support for NUMA systems, (2) NUMA
optimizations are not~portable, and (3) optimizations
are not~composable (i.e., they can become ineffective
or worsen performance in environments that support
composable parallel software). This paper presents
TBB-NUMA, a parallel programming library based on Intel
Threading Building Blocks (TBB) that supports portable
and composable NUMA-aware programming. TBB-NUMA
provides a model of task affinity that captures a
programmer's insights on mapping tasks to resources.
NUMA-awareness affects all layers of the library (i.e.,
resource management, task scheduling, and high-level
parallel algorithm templates) and requires close
coupling between all these layers. Optimizations
implemented with TBB-NUMA (for a set of standard
benchmark programs) result in up to 44\% performance
improvement over standard TBB, but more important,
optimized programs are portable across different NUMA
architectures and preserve data locality also when
composed with other parallel computations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Amer:2015:MRC,
author = "Abdelhalim Amer and Huiwei Lu and Yanjie Wei and Pavan
Balaji and Satoshi Matsuoka",
title = "{MPI+Threads}: runtime contention and remedies",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "239--248",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hybrid MPI+Threads programming has emerged as an
alternative model to the ``MPI everywhere'' model to
better handle the increasing core density in cluster
nodes. While the MPI standard allows multithreaded
concurrent communication, such flexibility comes with
the cost of maintaining thread safety within the MPI
implementation, typically implemented using critical
sections. In contrast to previous works that studied
the importance of critical-section granularity in MPI
implementations, in this paper we investigate the
implication of critical-section arbitration on
communication performance. We first analyze the MPI
runtime when multithreaded concurrent communication
takes place on hierarchical memory systems. Our results
indicate that the mutex-based approach that most MPI
implementations use today can incur performance
penalties due to unfair arbitration. We then present
methods to mitigate these penalties with a first-come,
first-served arbitration and a priority locking scheme
that favors threads doing useful work. Through
evaluations using several benchmarks and applications,
we demonstrate up to 5-fold improvement in
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{McPherson:2015:FPL,
author = "Andrew J. McPherson and Vijay Nagarajan and Susmit
Sarkar and Marcelo Cintra",
title = "Fence placement for legacy data-race-free programs via
synchronization read detection",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "249--250",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688524",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Fence placement is required to ensure legacy parallel
programs operate correctly on relaxed architectures.
The challenge is to place as few fences as possible
without compromising correctness. By identifying
necessary conditions for a read to be an acquire we
improve upon the state of the art for legacy DRF
programs by up to 2.64x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Piao:2015:JJF,
author = "Xianglan Piao and Channoh Kim and Younghwan Oh and
Huiying Li and Jincheon Kim and Hanjun Kim and Jae W.
Lee",
title = "{JAWS}: a {JavaScript} framework for adaptive
{CPU--GPU} work sharing",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "251--252",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces jAWS, a JavaScript framework for
adaptive work sharing between CPU and GPU for
data-parallel workloads. Unlike conventional
heterogeneous parallel programming environments for
JavaScript, which use only one compute device when
executing a single kernel, jAWS accelerates kernel
execution by exploiting both devices to realize full
performance potential of heterogeneous multicores. jAWS
employs an efficient work partitioning algorithm that
finds an optimal work distribution between the two
devices without requiring offline profiling. The jAWS
runtime provides shared arrays for multiple parallel
contexts, hence eliminating extra copy overhead for
input and output data. Our preliminary evaluation with
both CPU-friendly and GPU-friendly benchmarks
demonstrates that jAWS provides good load balancing and
efficient data communication between parallel contexts,
to significantly outperform best single-device
execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Seo:2015:GGS,
author = "Hyunseok Seo and Jinwook Kim and Min-Soo Kim",
title = "{GStream}: a graph streaming processing method for
large-scale graphs on {GPUs}",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "253--254",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Fast processing graph algorithms for large-scale
graphs becomes increasingly important. Besides, there
have been many attempts to process graph applications
by exploiting the massive amount of parallelism of
GPUs. However, most of the existing methods fail to
process large-scale graphs that do not fit in GPU
device memory. We propose a fast and scalable parallel
processing method GStream that fully exploits the
computational power of GPUs for processing large-scale
graphs (e.g., billions vertices) very efficiently. It
exploits the concept of nested-loop theta-join and
multiple asynchronous GPU streams. Extensive
experimental results show that GStream consistently and
significantly outperforms the state-of-the art
method.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Al-Saber:2015:SSA,
author = "Nabeel Al-Saber and Milind Kulkarni",
title = "{SemCache++}: semantics-aware caching for efficient
multi-{GPU} offloading",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "255--256",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688527",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Offloading computations to multiple GPUs is not an
easy task. It requires decomposing data, distributing
computations and handling communication manually.
Drop-in GPU libraries have made it easy to offload
computations to multiple GPUs by hiding this complexity
inside library calls. Such encapsulation prevents the
reuse of the data between successive kernel invocations
resulting in redundant communication. This limitation
exists in multi-GPU libraries like CUBLASXT. In this
paper, we introduce SemCache++, a semantics-aware GPU
cache that automatically manages communication between
the CPU and multiple GPUs in addition to optimizing
communication by eliminating redundant transfers using
caching. SemCache++ is used to build the first
multi-GPU drop-in replacement library that (a) uses the
virtual memory to automatically manage and optimize
multi-GPU communication and (b) requires no program
rewriting or annotations. Our caching technique is
efficient; it uses a two level caching directory to
track matrices and sub-matrices. Experimental results
show that our system can eliminate redundant
communication and deliver significant performance
improvements over multi-GPU libraries like CUBLASXT.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Kim:2015:OBU,
author = "Jungwon Kim and Seyong Lee and Jeffrey S. Vetter",
title = "An {OpenACC}-based unified programming model for
multi-accelerator systems",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "257--258",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper proposes a novel SPMD programming model of
OpenACC. Our model integrates the different
granularities of parallelism from vector-level
parallelism to node-level parallelism into a single,
unified model based on OpenACC. It allows programmers
to write programs for multiple accelerators using a
uniform programming model whether they are in shared or
distributed memory systems. We implement a prototype of
our model and evaluate its performance with a GPU-based
supercomputer using three benchmark applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Thomson:2015:LHB,
author = "Paul Thomson and Alastair F. Donaldson",
title = "The lazy happens-before relation: better partial-order
reduction for systematic concurrency testing",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "259--260",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688533",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the lazy happens-before relation (lazy
HBR), which ignores mutex-induced edges to provide a
more precise notion of state equivalence compared with
the traditional happens-before relation. We demonstrate
experimentally that the lazy HBR has the potential to
provide greater schedule reduction during systematic
concurrency testing with respect to a set of 79 Java
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Haidar:2015:TBL,
author = "Azzam Haidar and Tingxing Dong and Piotr Luszczek and
Stanimire Tomov and Jack Dongarra",
title = "Towards batched linear solvers on accelerated hardware
platforms",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "261--262",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As hardware evolves, an increasingly effective
approach to develop energy efficient, high-performance
solvers, is to design them to work on many small and
independent problems. Indeed, many applications already
need this functionality, especially for GPUs, which are
known to be currently about four to five times more
energy efficient than multicore CPUs for every
floating-point operation. In this paper, we describe
the development of the main one-sided factorizations:
LU, QR, and Cholesky; that are needed for a set of
small dense matrices to work in parallel. We refer to
such algorithms as batched factorizations. Our approach
is based on representing the algorithms as a sequence
of batched BLAS routines for GPU-contained execution.
Note that this is similar in functionality to the
LAPACK and the hybrid MAGMA algorithms for large-matrix
factorizations. But it is different from a
straightforward approach, whereby each of GPU's
symmetric multiprocessors factorizes a single problem
at a time. We illustrate how our performance analysis
together with the profiling and tracing tools guided
the development of batched factorizations to achieve up
to 2-fold speedup and 3-fold better energy efficiency
compared to our highly optimized batched CPU
implementations based on the MKL library on a
two-sockets, Intel Sandy Bridge server. Compared to a
batched LU factorization featured in the NVIDIA's
CUBLAS library for GPUs, we achieves up to 2.5-fold
speedup on the K40 GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Muralidharan:2015:COP,
author = "Saurav Muralidharan and Michael Garland and Bryan
Catanzaro and Albert Sidelnik and Mary Hall",
title = "A collection-oriented programming model for
performance portability",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "263--264",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688537",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes Surge, a collection-oriented
programming model that enables programmers to compose
parallel computations using nested high-level data
collections and operators. Surge exposes a code
generation interface, decoupled from the core
computation, that enables programmers and autotuners to
easily generate multiple implementations of the same
computation on various parallel architectures such as
multi-core CPUs and GPUs. By decoupling computations
from architecture-specific implementation, programmers
can target multiple architectures more easily, and
generate a search space that facilitates optimization
and customization for specific architectures. We
express in Surge four real-world benchmarks from
domains such as sparse linear-algebra and machine
learning and from the same performance-portable
specification, generate OpenMP and CUDA C++
implementations. Surge generates efficient, scalable
code which achieves up to 1.32x speedup over
handcrafted, well-optimized CUDA code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Wang:2015:GHP,
author = "Yangzihao Wang and Andrew Davidson and Yuechao Pan and
Yuduo Wu and Andy Riffel and John D. Owens",
title = "{Gunrock}: a high-performance graph processing library
on the {GPU}",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "265--266",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688538",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For large-scale graph analytics on the GPU, the
irregularity of data access and control flow and the
complexity of programming GPUs have been two
significant challenges for developing a programmable
high-performance graph library. ``Gunrock'', our
graph-processing system, uses a high-level
bulk-synchronous abstraction with traversal and
computation steps, designed specifically for the GPU.
Gunrock couples high performance with a high-level
programming model that allows programmers to quickly
develop new graph primitives with less than 300 lines
of code. We evaluate Gunrock on five graph primitives
and show that Gunrock has at least an order of
magnitude speedup over Boost and PowerGraph, comparable
performance to the fastest GPU hardwired primitives,
and better performance than any other GPU high-level
graph library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Pearce:2015:DLB,
author = "Olga Pearce and Todd Gamblin and Bronis R. de Supinski
and Martin Schulz and Nancy M. Amato",
title = "Decoupled load balancing",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "267--268",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern scientific simulations divide work between
parallel processors by decomposing a spatial domain of
mesh cells, particles, or other elements. A balanced
assignment of the computational load is critical for
parallel performance. If the computation per element
changes over the simulation time, simulations can use
dynamic load balance algorithms to evenly redistribute
work to processes. Graph partitioners are widely used
and balance very effectively, but they do not strong
scale well. Typical SPMD simulations wait while a load
balance algorithm runs on all processors, so a poorly
scaling algorithm can itself become a bottleneck. We
observe that the load balance algorithm is separate
from the main application computation and has its own
scaling properties. We propose to decouple the load
balance algorithm from the application, and to offload
the load balance computation so that it runs
concurrently with the application on a smaller number
of processors. We demonstrate the costs of decoupling
and offloading the load balancing algorithm from a
Barnes--Hut application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Jin:2015:CPI,
author = "Ye Jin and Mingliang Liu and Xiaosong Ma and Qing Liu
and Jeremy Logan and Norbert Podhorszki and Jong Youl
Choi and Scott Klasky",
title = "Combining phase identification and statistic modeling
for automated parallel benchmark generation",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "269--270",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688541",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parallel application benchmarks are indispensable for
evaluating/optimizing HPC software and hardware.
However, it is very challenging and costly to obtain
high-fidelity benchmarks reflecting the scale and
complexity of state-of-the-art parallel applications.
Hand-extracted synthetic benchmarks are time- and
labor-intensive to create. Real applications
themselves, while offering most accurate performance
evaluation, are expensive to compile, port,
reconfigure, and often plainly inaccessible due to
security or ownership concerns. This work contributes
APPRIME, a novel tool for trace-based automatic
parallel benchmark generation. Taking as input standard
communication-I/O traces of an application's execution,
it couples accurate automatic phase identification with
statistical regeneration of event parameters to create
compact, portable, and to some degree reconfigurable
parallel application benchmarks. Experiments with four
NAS Parallel Benchmarks (NPB) and three real scientific
simulation codes confirm the fidelity of APPRIME
benchmarks. They retain the original applications'
performance characteristics, in particular the relative
performance across platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Shi:2015:OAG,
author = "Xuanhua Shi and Junling Liang and Sheng Di and
Bingsheng He and Hai Jin and Lu Lu and Zhixiang Wang
and Xuan Luo and Jianlong Zhong",
title = "Optimization of asynchronous graph processing on {GPU}
with hybrid coloring model",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "271--272",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688542",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern GPUs have been widely used to accelerate the
graph processing for complicated computational problems
regarding graph theory. Many parallel graph algorithms
adopt the asynchronous computing model to accelerate
the iterative convergence. Unfortunately, the
consistent asynchronous computing requires locking or
the atomic operations, leading to significant
penalties/overheads when implemented on GPUs. To this
end, coloring algorithm is adopted to separate the
vertices with potential updating conflicts,
guaranteeing the consistency/correctness of the
parallel processing. We propose a light-weight
asynchronous processing framework called Frog with a
hybrid coloring model. We find that majority of
vertices (about 80\%) are colored with only a few
colors, such that they can be read and updated in a
very high degree of parallelism without violating the
sequential consistency. Accordingly, our solution will
separate the processing of the vertices based on the
distribution of colors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{West:2015:ERO,
author = "Scott West and Sebastian Nanz and Bertrand Meyer",
title = "Efficient and reasonable object-oriented concurrency",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "273--274",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688545",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Making threaded programs safe and easy to reason about
is one of the chief difficulties in modern programming.
This work provides an efficient execution model and
implementation for SCOOP, a concurrency approach that
provides not only data-race freedom but also
pre/postcondition reasoning guarantees between threads.
The extensions we propose influence the underlying
semantics to increase the amount of concurrent
execution that is possible, exclude certain classes of
deadlocks, and enable greater performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Vassiliadis:2015:PMR,
author = "Vassilis Vassiliadis and Konstantinos Parasyris and
Charalambos Chalios and Christos D. Antonopoulos and
Spyros Lalis and Nikolaos Bellas and Hans
Vandierendonck and Dimitrios S. Nikolopoulos",
title = "A programming model and runtime system for
significance-aware energy-efficient computing",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "275--276",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688546",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a task-based programming model and
runtime system that exploit the observation that not
all parts of a program are equally significant for the
accuracy of the end-result, in order to trade off the
quality of program outputs for increased
energy-efficiency. This is done in a structured and
flexible way, allowing for easy exploitation of
different points in the quality/energy space, without
adversely affecting application performance. The
runtime system can apply a number of different policies
to decide whether it will execute less-significant
tasks accurately or approximately. The experimental
evaluation indicates that our system can achieve an
energy reduction of up to 83\% compared with a fully
accurate execution and up to 35\% compared with an
approximate version employing loop perforation. At the
same time, our approach always results in graceful
quality degradation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Wimmer:2015:LFK,
author = "Martin Wimmer and Jakob Gruber and Jesper Larsson
Tr{\"a}ff and Philippas Tsigas",
title = "The lock-free {$k$-LSM} relaxed priority queue",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "277--278",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688547",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new, concurrent, lock-free priority queue
that relaxes the delete-min operation to allow deletion
of any of the \rho smallest keys instead of only a
minimal one, where \rho is a parameter that can be
configured at runtime. It is built from a logarithmic
number of sorted arrays, similar to log-structured
merge-trees (LSM). For keys added and removed by the
same thread the behavior is identical to a non-relaxed
priority queue. We compare to state-of-the-art
lock-free priority queues with both relaxed and
non-relaxed semantics, showing high performance and
good scalability of our approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Saillard:2015:SDV,
author = "Emmanuelle Saillard and Patrick Carribault and Denis
Barthou",
title = "Static\slash dynamic validation of {MPI} collective
communications in multi-threaded context",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "279--280",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688548",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scientific applications mainly rely on the MPI
parallel programming model to reach high performance on
supercomputers. The advent of manycore architectures
(larger number of cores and lower amount of memory per
core) leads to mix MPI with a thread-based model like
OpenMP. But integrating two different programming
models inside the same application can be tricky and
generate complex bugs. Thus, the correctness of hybrid
programs requires a special care regarding MPI calls
location. For example, identical MPI collective
operations cannot be performed by multiple
non-synchronized threads. To tackle this issue, this
paper proposes a static analysis and a reduced dynamic
instrumentation to detect bugs related to misuse of MPI
collective operations inside or outside threaded
regions. This work extends PARCOACH designed for
MPI-only applications and keeps the compatibility with
these algorithms. We validated our method on multiple
hybrid benchmarks and applications with a low
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Ramachandran:2015:CFC,
author = "Arunmoezhi Ramachandran and Neeraj Mittal",
title = "{CASTLE}: fast concurrent internal binary search tree
using edge-based locking",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "281--282",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688551",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new lock-based algorithm for concurrent
manipulation of a binary search tree in an asynchronous
shared memory system that supports search, insert and
delete operations. Some of the desirable
characteristics of our algorithm are: (i) a search
operation uses only read and write instructions, (ii)
an insert operation does not acquire any locks, and
(iii) a delete operation only needs to lock up to four
edges in the absence of contention. Our algorithm is
based on an internal representation of a search tree
and it operates at edge-level (locks edges) rather than
at node-level (locks nodes); this minimizes the
contention window of a write operation and improves the
system throughput. Our experiments indicate that our
lock-based algorithm outperforms existing algorithms
for a concurrent binary search tree for medium-sized
and larger trees, achieving up to 59\% higher
throughput than the next best algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Das:2015:SBP,
author = "Madan Das and Gabriel Southern and Jose Renau",
title = "Section based program analysis to reduce overhead of
detecting unsynchronized thread communication",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "283--284",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688552",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose Section Based Program Analysis (SBPA), a
novel way to decompose programs into disjoint sections
to identify non-communicating loads and stores during
program compilation. We implemented SBPA for a
deterministic execution runtime environment and reduced
63\% of dynamic memory access instrumentations. We also
integrated SBPA with ThreadSanitizer, and achieved a
speed-up of 2.74 on a geometric mean basis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Harshvardhan:2015:HAR,
author = "Harshvardhan and Nancy M. Amato and Lawrence
Rauchwerger",
title = "A hierarchical approach to reducing communication in
parallel graph algorithms",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "285--286",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2700994",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large-scale graph computing has become critical due to
the ever-increasing size of data. However, distributed
graph computations are limited in their scalability and
performance due to the heavy communication inherent in
such computations. This is exacerbated in scale-free
networks, such as social and web graphs, which contain
hub vertices that have large degrees and therefore send
a large number of messages over the network.
Furthermore, many graph algorithms and computations
send the same data to each of the neighbors of a
vertex. Our proposed approach recognizes this, and
reduces communication performed by the algorithm
without change to user-code, through a hierarchical
machine model imposed upon the input graph. The
hierarchical model takes advantage of locale
information of the neighboring vertices to reduce
communication, both in message volume and total number
of bytes sent. It is also able to better exploit the
machine hierarchy to further reduce the communication
costs, by aggregating traffic between different levels
of the machine hierarchy. Results of an implementation
in the STAPL GL shows improved scalability and
performance over the traditional level-synchronous
approach, with 2.5$ \times $-8$ \times $ improvement
for a variety of graph algorithms at 12,000+ cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Chen:2015:TNL,
author = "Yifeng Chen and Xiang Cui and Hong Mei",
title = "{Tiles}: a new language mechanism for heterogeneous
parallelism",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "287--288",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688555",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper studies the essence of heterogeneity from
the perspective of language mechanism design. The
proposed mechanism, called tiles, is a program
construct that bridges two relative levels of
computation: an outer level of source data in larger,
slower or more distributed memory and an inner level of
data blocks in smaller, faster or more localized
memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Radoi:2015:WAR,
author = "Cosmin Radoi and Stephan Herhut and Jaswanth Sreeram
and Danny Dig",
title = "Are web applications ready for parallelism?",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "289--290",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2700995",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In recent years, web applications have become
pervasive. Their backbone is JavaScript, the only
programming language supported by all major web
browsers. Most browsers run on desktop or mobile
devices with parallel hardware. However, JavaScript is
by design sequential, and current web applications make
little use of hardware parallelism. Are web
applications ready to exploit parallel hardware? We
answer the question in two steps: First, we survey 174
web developers about the potential and challenges of
using parallelism. Then, we study the performance and
computation shape of a set of web applications that are
representative for the emerging web. Our findings
indicate that emerging web applications do have latent
data parallelism, and JavaScript developers'
programming style is not a significant impediment to
exploiting this parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Bodik:2015:PSO,
author = "Rastislav Bodik",
title = "Program synthesis: opportunities for the next decade",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "1--1",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2789052",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program synthesis is the contemporary answer to
automatic programming. It innovates in two ways: First,
it replaces batch automation with interactivity,
assisting the programmer in refining the understanding
of the programming problem. Second, it produces
programs using search in a candidate space rather than
by derivation from a specification. Searching for an
acceptable program means that we can accommodate
incomplete specifications, such as examples.
Additionally, search makes synthesis applicable to
domains that lack correct-by-construction derivation
rules, such as hardware design, education, end-user
programming, and systems biology. The future of
synthesis rests on four challenges, each presenting an
opportunity to develop novel abstractions for
``programming with search.'' Larger scope: today, we
synthesize small, flat programs; synthesis of large
software will need constructs for modularity and
stepwise refinement. New interaction modes: to solicit
the specification without simply asking for more
examples, we need to impose a structure on the
candidate space and explore it in a dialogue.
Construction: how to compile a synthesis problem to a
search algorithm without building a compiler?
Everything is a program: whatever can be phrased as a
program can be in principle synthesized. Indeed, we
will see synthesis advance from synthesis of plain
programs to synthesis of compilers and languages. The
latter may include DSLs, type systems, and modeling
languages for biology. As such, synthesis could help
mechanize the crown jewel of programming languages
research --- the design of abstractions --- which has
so far been done manually and only by experts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Rompf:2015:FPS,
author = "Tiark Rompf and Nada Amin",
title = "Functional pearl: a {SQL} to {C} compiler in 500 lines
of code",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "2--9",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784760",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the design and implementation of a SQL
query processor that outperforms existing database
systems and is written in just about 500 lines of Scala
code --- a convincing case study that high-level
functional programming can handily beat C for
systems-level programming where the last drop of
performance matters. The key enabler is a shift in
perspective towards generative programming. The core of
the query engine is an interpreter for relational
algebra operations, written in Scala. Using the
open-source LMS Framework (Lightweight Modular
Staging), we turn this interpreter into a query
compiler with very low effort. To do so, we capitalize
on an old and widely known result from partial
evaluation known as Futamura projections, which state
that a program that can specialize an interpreter to
any given input program is equivalent to a compiler. In
this pearl, we discuss LMS programming patterns such as
mixed-stage data structures (e.g. data records with
static schema and dynamic field components) and
techniques to generate low-level C code, including
specialized data structures and data loading
primitives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Chlipala:2015:OCP,
author = "Adam Chlipala",
title = "An optimizing compiler for a purely functional
web-application language",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "10--21",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784741",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High-level scripting languages have become
tremendously popular for development of dynamic Web
applications. Many programmers appreciate the
productivity benefits of automatic storage management,
freedom from verbose type annotations, and so on. While
it is often possible to improve performance
substantially by rewriting an application in C or a
similar language, very few programmers bother to do so,
because of the consequences for human development
effort. This paper describes a compiler that makes it
possible to have most of the best of both worlds,
coding Web applications in a high-level language but
compiling to native code with performance comparable to
handwritten C code. The source language is Ur/Web, a
domain-specific, purely functional, statically typed
language for the Web. Through a coordinated suite of
relatively straightforward program analyses and
algebraic optimizations, we transform Ur/Web programs
into almost-idiomatic C code, with no garbage
collection, little unnecessary memory allocation for
intermediate values, etc. Our compiler is in production
use for commercial Web sites supporting thousands of
users, and microbenchmarks demonstrate very competitive
performance versus mainstream tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Bauman:2015:PTJ,
author = "Spenser Bauman and Carl Friedrich Bolz and Robert
Hirschfeld and Vasily Kirilichev and Tobias Pape and
Jeremy G. Siek and Sam Tobin-Hochstadt",
title = "{Pycket}: a tracing {JIT} for a functional language",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "22--34",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784740",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Pycket, a high-performance tracing JIT
compiler for Racket. Pycket supports a wide variety of
the sophisticated features in Racket such as contracts,
continuations, classes, structures, dynamic binding,
and more. On average, over a standard suite of
benchmarks, Pycket outperforms existing compilers, both
Racket's JIT and other highly-optimizing Scheme
compilers. Further, Pycket provides much better
performance for Racket proxies than existing systems,
dramatically reducing the overhead of contracts and
gradual typing. We validate this claim with performance
evaluation on multiple existing benchmark suites. The
Pycket implementation is of independent interest as an
application of the RPython meta-tracing framework
(originally created for PyPy), which automatically
generates tracing JIT compilers from interpreters.
Prior work on meta-tracing focuses on bytecode
interpreters, whereas Pycket is a high-level
interpreter based on the CEK abstract machine and
operates directly on abstract syntax trees. Pycket
supports proper tail calls and first-class
continuations. In the setting of a functional language,
where recursion and higher-order functions are more
prevalent than explicit loops, the most significant
performance challenge for a tracing JIT is identifying
which control flows constitute a loop---we discuss two
strategies for identifying loops and measure their
impact.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Rossberg:2015:CMU,
author = "Andreas Rossberg",
title = "{1ML} --- core and modules united ({$F$}-ing
first-class modules)",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "35--47",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784738",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "ML is two languages in one: there is the core, with
types and expressions, and there are modules, with
signatures, structures and functors. Modules form a
separate, higher-order functional language on top of
the core. There are both practical and technical
reasons for this stratification; yet, it creates
substantial duplication in syntax and semantics, and it
reduces expressiveness. For example, selecting a module
cannot be made a dynamic decision. Language extensions
allowing modules to be packaged up as first-class
values have been proposed and implemented in different
variations. However, they remedy expressiveness only to
some extent, are syntactically cumbersome, and do not
alleviate redundancy. We propose a redesign of ML in
which modules are truly first-class values, and core
and module layer are unified into one language. In this
``1ML'', functions, functors, and even type
constructors are one and the same construct; likewise,
no distinction is made between structures, records, or
tuples. Or viewed the other way round, everything is
just (``a mode of use of'') modules. Yet, 1ML does not
require dependent types, and its type structure is
expressible in terms of plain System F \omega , in a
minor variation of our F-ing modules approach. We
introduce both an explicitly typed version of 1ML, and
an extension with Damas/Milner-style implicit
quantification. Type inference for this language is not
complete, but, we argue, not substantially worse than
for Standard ML. An alternative view is that 1ML is a
user-friendly surface syntax for System F \omega that
allows combining term and type abstraction in a more
compositional manner than the bare calculus.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Vazou:2015:BRT,
author = "Niki Vazou and Alexander Bakst and Ranjit Jhala",
title = "Bounded refinement types",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "48--61",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784745",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a notion of bounded quantification for
refinement types and show how it expands the
expressiveness of refinement typing by using it to
develop typed combinators for: (1) relational algebra
and safe database access, (2) Floyd-Hoare logic within
a state transformer monad equipped with combinators for
branching and looping, and (3) using the above to
implement a refined IO monad that tracks capabilities
and resource usage. This leap in expressiveness comes
via a translation to ``ghost'' functions, which lets us
retain the automated and decidable SMT based checking
and inference that makes refinement typing effective in
practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Matsuda:2015:ABP,
author = "Kazutaka Matsuda and Meng Wang",
title = "Applicative bidirectional programming with lenses",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "62--74",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784750",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A bidirectional transformation is a pair of mappings
between source and view data objects, one in each
direction. When the view is modified, the source is
updated accordingly with respect to some laws. One way
to reduce the development and maintenance effort of
bidirectional transformations is to have specialized
languages in which the resulting programs are
bidirectional by construction---giving rise to the
paradigm of bidirectional programming. In this paper,
we develop a framework for applicative-style and
higher-order bidirectional programming, in which we can
write bidirectional transformations as unidirectional
programs in standard functional languages, opening up
access to the bundle of language features previously
only available to conventional unidirectional
languages. Our framework essentially bridges two very
different approaches of bidirectional programming,
namely the lens framework and Voigtl{\"a}nder's
semantic bidirectionalization, creating a new
programming style that is able to bag benefits from
both.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Pombrio:2015:HRC,
author = "Justin Pombrio and Shriram Krishnamurthi",
title = "Hygienic resugaring of compositional desugaring",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "75--87",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784755",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Syntactic sugar is widely used in language
implementation. Its benefits are, however, offset by
the comprehension problems it presents to programmers
once their program has been transformed. In particular,
after a transformed program has begun to evaluate (or
otherwise be altered by a black-box process), it can
become unrecognizable. We present a new approach to
_resugaring_ programs, which is the act of reflecting
evaluation steps in the core language in terms of the
syntactic sugar that the programmer used. Relative to
prior work, our approach has two important advances: it
handles hygiene, and it allows almost arbitrary
rewriting rules (as opposed to restricted patterns). We
do this in the context of a DAG representation of
programs, rather than more traditional trees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Geneves:2015:XST,
author = "Pierre Genev{\`e}s and Nils Gesbert",
title = "{XQuery} and static typing: tackling the problem of
backward axes",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "88--100",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784746",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "XQuery is a functional language dedicated to XML data
querying and manipulation. As opposed to other
W3C-standardized languages for XML (e.g. XSLT), it has
been intended to feature strong static typing.
Currently, however, some expressions of the language
cannot be statically typed with any precision. We argue
that this is due to a discrepancy between the semantics
of the language and its type algebra: namely, the
values of the language are (possibly inner) tree nodes,
which may have siblings and ancestors in the data. The
types on the other hand are regular tree types, as
usual in the XML world: they describe sets of trees.
The type associated to a node then corresponds to the
subtree whose root is that node and contains no
information about the rest of the data. This makes
navigation expressions using `backward axes,' which
return e.g. the siblings of a node, impossible to type.
We discuss how to handle this discrepancy by improving
the type system. We describe a logic-based language of
extended types able to represent inner tree nodes and
show how it can dramatically increase the precision of
typing for navigation expressions. We describe how
inclusion between these extended types and the
classical regular tree types can be decided, allowing a
hybrid system combining both type languages. The result
is a net increase in precision of typing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Bowman:2015:NF,
author = "William J. Bowman and Amal Ahmed",
title = "Noninterference for free",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "101--113",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784733",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The dependency core calculus (DCC) is a framework for
studying a variety of dependency analyses (e.g., secure
information flow). The key property provided by DCC is
noninterference, which guarantees that a low-level
observer (attacker) cannot distinguish high-level
(protected) computations. The proof of noninterference
for DCC suggests a connection to parametricity in
System F, which suggests that it should be possible to
implement dependency analyses in languages with
parametric polymorphism. We present a translation from
DCC into F \omega and prove that the translation
preserves noninterference. To express noninterference
in F \omega , we define a notion of observer-sensitive
equivalence that makes essential use of both
first-order and higher-order polymorphism. Our
translation provides insights into DCC's type system
and shows how DCC can be implemented in a polymorphic
language without loss of the noninterference (security)
guarantees available in DCC. Our contributions include
proof techniques that should be valuable when proving
other secure compilation or full abstraction results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Gaboardi:2015:ACL,
author = "Marco Gaboardi and Romain P{\'e}choux",
title = "Algebras and coalgebras in the light affine {Lambda}
calculus",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "114--126",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784759",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Algebra and coalgebra are widely used to model data
types in functional programming languages and proof
assistants. Their use permits to better structure the
computations and also to enhance the expressivity of a
language or of a proof system. Interestingly,
parametric polymorphism {\`a} la System F provides a
way to encode algebras and coalgebras in strongly
normalizing languages without losing the good logical
properties of the calculus. Even if these encodings are
sometimes unsatisfying because they provide only
limited forms of algebras and coalgebras, they give
insights on the expressivity of System F in terms of
functions that we can program in it. With the goal of
contributing to a better understanding of the
expressivity of Implicit Computational Complexity
systems, we study the problem of defining algebras and
coalgebras in the Light Affine Lambda Calculus, a
system characterizing the complexity class FPTIME. This
system limits the computational complexity of programs
but it also limits the ways we can use parametric
polymorphism, and in general the way we can write our
programs. We show here that while the restrictions
imposed by the Light Affine Lambda Calculus pose some
issues to the standard System F encodings, they still
permit to encode some form of algebra and coalgebra.
Using the algebra encoding one can define in the Light
Affine Lambda Calculus the traditional inductive types.
Unfortunately, the corresponding coalgebra encoding
permits only a very limited form of coinductive data
types. To extend this class we study an extension of
the Light Affine Lambda Calculus by distributive laws
for the modality \S . This extension has been discussed
but not studied before.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Downen:2015:SSR,
author = "Paul Downen and Philip Johnson-Freyd and Zena M.
Ariola",
title = "Structures for structural recursion",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "127--139",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784762",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Our goal is to develop co-induction from our
understanding of induction, putting them on level
ground as equal partners for reasoning about programs.
We investigate several structures which represent
well-founded forms of recursion in programs. These
simple structures encapsulate reasoning by primitive
and noetherian induction principles, and can be
composed together to form complex recursion schemes for
programs operating over a wide class of data and
co-data types. At its heart, this study is guided by
duality: each structure for recursion has a dual form,
giving perfectly symmetric pairs of equal and opposite
data and co-data types for representing recursion in
programs. Duality is brought out through a framework
presented in sequent style, which inherently includes
control effects that are interpreted logically as
classical reasoning principles. To accommodate the
presence of effects, we give a calculus parameterized
by a notion of strategy, which is strongly normalizing
for a wide range of strategies. We also present a more
traditional calculus for representing effect-free
functional programs, but at the cost of losing some of
the founding dualities.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Danner:2015:DCS,
author = "Norman Danner and Daniel R. Licata and Ramyaa Ramyaa",
title = "Denotational cost semantics for functional languages
with inductive types",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "140--151",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784749",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A central method for analyzing the asymptotic
complexity of a functional program is to extract and
then solve a recurrence that expresses evaluation cost
in terms of input size. The relevant notion of input
size is often specific to a datatype, with measures
including the length of a list, the maximum element in
a list, and the height of a tree. In this work, we give
a formal account of the extraction of cost and size
recurrences from higher-order functional programs over
inductive datatypes. Our approach allows a wide range
of programmer-specified notions of size, and ensures
that the extracted recurrences correctly predict
evaluation cost. To extract a recurrence from a
program, we first make costs explicit by applying a
monadic translation from the source language to a
complexity language, and then abstract datatype values
as sizes. Size abstraction can be done semantically,
working in models of the complexity language, or
syntactically, by adding rules to a preorder judgement.
We give several different models of the complexity
language, which support different notions of size.
Additionally, we prove by a logical relations argument
that recurrences extracted by this process are upper
bounds for evaluation cost; the proof is entirely
syntactic and therefore applies to all of the models we
consider.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Avanzini:2015:ACF,
author = "Martin Avanzini and Ugo {Dal Lago} and Georg Moser",
title = "Analysing the complexity of functional programs:
higher-order meets first-order",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "152--164",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784753",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show how the complexity of higher-order functional
programs can be analysed automatically by applying
program transformations to a defunctionalised versions
of them, and feeding the result to existing tools for
the complexity analysis of first-order term rewrite
systems. This is done while carefully analysing
complexity preservation and reflection of the employed
transformations such that the complexity of the
obtained term rewrite system reflects on the complexity
of the initial program. Further, we describe suitable
strategies for the application of the studied
transformations and provide ample experimental data for
assessing the viability of our method.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Sheeran:2015:FPH,
author = "Mary Sheeran",
title = "Functional programming and hardware design: still
interesting after all these years",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "165--165",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2789053",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Higher order functions provide an elegant way to
express algorithms designed for implementation in
hardware. By showing examples of both classic and new
algorithms, I will explain why higher order functions
deserve to be studied. Next, I will consider the extent
to which ideas from functional programming, and
associated formal verification methods, have influenced
hardware design in practice. What can we learn from
looking back? You might ask ``Why are methods of
hardware design still important to our community?''.
Maybe we should just give up? One reason for not giving
up is that hardware design is really a form of parallel
programming. And here there is still a lot to do!
Inspired by Blelloch's wonderful invited talk at ICFP
2010, I still believe that functional programming has
much to offer in the central question of how to program
the parallel machines of today, and, more particularly,
of the future. I will briefly present some of the areas
where I think that we are poised to make great
contributions. But maybe we need to work harder on
getting our act together?",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Neis:2015:PCV,
author = "Georg Neis and Chung-Kil Hur and Jan-Oliver Kaiser and
Craig McLaughlin and Derek Dreyer and Viktor
Vafeiadis",
title = "{Pilsner}: a compositionally verified compiler for a
higher-order imperative language",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "166--178",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784764",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compiler verification is essential for the
construction of fully verified software, but most prior
work (such as CompCert) has focused on verifying
whole-program compilers. To support separate
compilation and to enable linking of results from
different verified compilers, it is important to
develop a compositional notion of compiler correctness
that is modular (preserved under linking), transitive
(supports multi-pass compilation), and flexible
(applicable to compilers that use different
intermediate languages or employ non-standard program
transformations). In this paper, building on prior work
of Hur et al., we develop a novel approach to
compositional compiler verification based on parametric
inter-language simulations (PILS). PILS are modular:
they enable compiler verification in a manner that
supports separate compilation. PILS are transitive: we
use them to verify Pilsner, a simple (but non-trivial)
multi-pass optimizing compiler (programmed in Coq) from
an ML-like source language S to an assembly-like target
language T, going through a CPS-based intermediate
language. Pilsner is the first multi-pass compiler for
a higher-order imperative language to be
compositionally verified. Lastly, PILS are flexible: we
use them to additionally verify (1) Zwickel, a direct
non-optimizing compiler for S, and (2) a hand-coded
self-modifying T module, proven correct w.r.t. an
S-level specification. The output of Zwickel and the
self-modifying T module can then be safely linked
together with the output of Pilsner. All together, this
has been a significant undertaking, involving several
person-years of work and over 55,000 lines of Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Ziliani:2015:UAC,
author = "Beta Ziliani and Matthieu Sozeau",
title = "A unification algorithm for {Coq} featuring universe
polymorphism and overloading",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "179--191",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784751",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Unification is a core component of every proof
assistant or programming language featuring dependent
types. In many cases, it must deal with higher-order
problems up to conversion. Since unification in such
conditions is undecidable, unification algorithms may
include several heuristics to solve common problems.
However, when the stack of heuristics grows large, the
result and complexity of the algorithm can become
unpredictable. Our contributions are twofold: (1) We
present a full description of a new unification
algorithm for the Calculus of Inductive Constructions
(the base logic of Coq), including universe
polymorphism, canonical structures (the overloading
mechanism baked into Coq's unification), and a small
set of useful heuristics. (2) We implemented our
algorithm, and tested it on several libraries,
providing evidence that the selected set of heuristics
suffices for large developments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Blanchette:2015:FEC,
author = "Jasmin Christian Blanchette and Andrei Popescu and
Dmitriy Traytel",
title = "Foundational extensible corecursion: a proof assistant
perspective",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "192--204",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784732",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a formalized framework for
defining corecursive functions safely in a total
setting, based on corecursion up-to and relational
parametricity. The end product is a general corecursor
that allows corecursive (and even recursive) calls
under ``friendly'' operations, including constructors.
Friendly corecursive functions can be registered as
such, thereby increasing the corecursor's
expressiveness. The metatheory is formalized in the
Isabelle proof assistant and forms the core of a
prototype tool. The corecursor is derived from first
principles, without requiring new axioms or extensions
of the logic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Steuwer:2015:GPP,
author = "Michel Steuwer and Christian Fensch and Sam Lindley
and Christophe Dubach",
title = "Generating performance portable code using rewrite
rules: from high-level functional expressions to
high-performance {OpenCL} code",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "205--217",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784754",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computers have become increasingly complex with the
emergence of heterogeneous hardware combining multicore
CPUs and GPUs. These parallel systems exhibit
tremendous computational power at the cost of increased
programming effort resulting in a tension between
performance and code portability. Typically, code is
either tuned in a low-level imperative language using
hardware-specific optimizations to achieve maximum
performance or is written in a high-level, possibly
functional, language to achieve portability at the
expense of performance. We propose a novel approach
aiming to combine high-level programming, code
portability, and high-performance. Starting from a
high-level functional expression we apply a simple set
of rewrite rules to transform it into a low-level
functional representation, close to the OpenCL
programming model, from which OpenCL code is generated.
Our rewrite rules define a space of possible
implementations which we automatically explore to
generate hardware-specific OpenCL implementations. We
formalize our system with a core dependently-typed
lambda-calculus along with a denotational semantics
which we use to prove the correctness of the rewrite
rules. We test our design in practice by implementing a
compiler which generates high performance imperative
OpenCL code. Our experiments show that we can
automatically derive hardware-specific implementations
from simple functional high-level algorithmic
expressions offering performance on a par with highly
tuned code for multicore CPUs and GPUs written by
experts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Newton:2015:ALF,
author = "Ryan R. Newton and Peter P. Fogg and Ali Varamesh",
title = "Adaptive lock-free maps: purely-functional to
scalable",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "218--229",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784734",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Purely functional data structures stored inside a
mutable variable provide an excellent concurrent data
structure-obviously correct, cheap to create, and
supporting snapshots. They are not, however, scalable.
We provide a way to retain the benefits of these
pure-in-a-box data structures while dynamically
converting to a more scalable lock-free data structure
under contention. Our solution scales to any pair of
pure and lock-free container types with key/value set
semantics, while retaining lock-freedom. We demonstrate
the principle in action on two very different
platforms: first in the Glasgow Haskell Compiler and
second in Java. To this end we extend GHC to support
lock-free data structures and introduce a new approach
for safe CAS in a lazy language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Le:2015:PAT,
author = "Matthew Le and Matthew Fluet",
title = "Partial aborts for transactions via first-class
continuations",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "230--242",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784736",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software transactional memory (STM) has proven to be a
useful abstraction for developing concurrent
applications, where programmers denote transactions
with an atomic construct that delimits a collection of
reads and writes to shared mutable references. The
runtime system then guarantees that all transactions
are observed to execute atomically with respect to each
other. Traditionally, when the runtime system detects
that one transaction conflicts with another, it aborts
one of the transactions and restarts its execution from
the beginning. This can lead to problems with both
execution time and throughput. In this paper, we
present a novel approach that uses first-class
continuations to restart a conflicting transaction at
the point of a conflict, avoiding the re-execution of
any work from the beginning of the transaction that has
not been compromised. In practice, this allows
transactions to complete more quickly, decreasing
execution time and increasing throughput. We have
implemented this idea in the context of the Manticore
project, an ML-family language with support for
parallelism and concurrency. Crucially, we rely on
constant-time continuation capturing via a
continuation-passing-style (CPS) transformation and
heap-allocated continuations. When comparing our STM
that performs partial aborts against one that performs
full aborts, we achieve a decrease in execution time of
up to 31\% and an increase in throughput of up to
351\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Scherer:2015:WST,
author = "Gabriel Scherer and Didier R{\'e}my",
title = "Which simple types have a unique inhabitant?",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "243--255",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784757",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study the question of whether a given type has a
unique inhabitant modulo program equivalence. In the
setting of simply-typed lambda-calculus with sums,
equipped with the strong --equivalence, we show that
uniqueness is decidable. We present a saturating
focused logic that introduces irreducible cuts on
positive types ``as soon as possible''. Backward search
in this logic gives an effective algorithm that returns
either zero, one or two distinct inhabitants for any
given type. Preliminary application studies show that
such a feature can be useful in strongly-typed
programs, inferring the code of highly-polymorphic
library functions, or ``glue code'' inside more complex
terms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Dunfield:2015:EEO,
author = "Joshua Dunfield",
title = "Elaborating evaluation-order polymorphism",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "256--268",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784744",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We classify programming languages according to
evaluation order: each language fixes one evaluation
order as the default, making it transparent to program
in that evaluation order, and troublesome to program in
the other. This paper develops a type system that is
impartial with respect to evaluation order. Evaluation
order is implicit in terms, and explicit in types, with
by-value and by-name versions of type connectives. A
form of intersection type quantifies over evaluation
orders, describing code that is agnostic over (that is,
polymorphic in) evaluation order. By allowing such
generic code, programs can express the by-value and
by-name versions of a computation without code
duplication. We also formulate a type system that only
has by-value connectives, plus a type that generalizes
the difference between by-value and by-name
connectives: it is either a suspension (by name) or a
``no-op'' (by value). We show a straightforward
encoding of the impartial type system into the more
economical one. Then we define an elaboration from the
economical language to a call-by-value semantics, and
prove that elaborating a well-typed source program,
where evaluation order is implicit, produces a
well-typed target program where evaluation order is
explicit. We also prove a simulation between evaluation
of the target program and reductions (either by-value
or by-name) in the source program. Finally, we prove
that typing, elaboration, and evaluation are faithful
to the type annotations given in the source program: if
the programmer only writes by-value types, no by-name
reductions can occur at run time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Rendel:2015:ARL,
author = "Tillmann Rendel and Julia Trieflinger and Klaus
Ostermann",
title = "Automatic refunctionalization to a language with
copattern matching: with applications to the expression
problem",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "269--279",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784763",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "Defunctionalization and refunctionalization establish
a correspondence between first-class functions and
pattern matching, but the correspondence is not
symmetric: Not all uses of pattern matching can be
automatically refunctionalized to uses of higher-order
functions. To remedy this asymmetry, we generalize from
first-class functions to arbitrary codata. This leads
us to full defunctionalization and refunctionalization
between a codata language based on copattern matching
and a data language based on pattern matching. We
observe how programs can be written as matrices so that
they are modularly extensible in one dimension but not
the other. In this representation, defunctionalization
and refunctionalization correspond to matrix
transposition which effectively changes the dimension
of extensibility a program supports. This suggests
applications to the expression problem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Russo:2015:FPT,
author = "Alejandro Russo",
title = "Functional pearl: two can keep a secret, if one of
them uses {Haskell}",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "280--288",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784756",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For several decades, researchers from different
communities have independently focused on protecting
confidentiality of data. Two distinct technologies have
emerged for such purposes: Mandatory Access Control
(MAC) and Information-Flow Control (IFC)-the former
belonging to operating systems (OS) research, while the
latter to the programming languages community. These
approaches restrict how data gets propagated within a
system in order to avoid information leaks. In this
scenario, Haskell plays a unique privileged role: it is
able to protect confidentiality via libraries. This
pearl presents a monadic API which statically protects
confidentiality even in the presence of advanced
features like exceptions, concurrency, and mutable data
structures. Additionally, we present a mechanism to
safely extend the library with new primitives, where
library designers only need to indicate the read and
write effects of new operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Buiras:2015:HMS,
author = "Pablo Buiras and Dimitrios Vytiniotis and Alejandro
Russo",
title = "{HLIO}: mixing static and dynamic typing for
information-flow control in {Haskell}",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "289--301",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784758",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Information-Flow Control (IFC) is a well-established
approach for allowing untrusted code to manipulate
sensitive data without disclosing it. IFC is typically
enforced via type systems and static analyses or via
dynamic execution monitors. The LIO Haskell library,
originating in operating systems research, implements a
purely dynamic monitor of the sensitivity level of a
computation, particularly suitable when data
sensitivity levels are only known at runtime. In this
paper, we show how to give programmers the flexibility
of deferring IFC checks to runtime (as in LIO), while
also providing static guarantees---and the absence of
runtime checks---for parts of their programs that can
be statically verified (unlike LIO). We present the
design and implementation of our approach, HLIO (Hybrid
LIO), as an embedding in Haskell that uses a novel
technique for deferring IFC checks based on singleton
types and constraint polymorphism. We formalize HLIO,
prove non-interference, and show how interesting IFC
examples can be programmed. Although our motivation is
IFC, our technique for deferring constraints goes well
beyond and offers a methodology for
programmer-controlled hybrid type checking in
Haskell.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{vanderPloeg:2015:PPF,
author = "Atze van der Ploeg and Koen Claessen",
title = "Practical principled {FRP}: forget the past, change
the future, {FRPNow}!",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "302--314",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784752",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new interface for practical Functional
Reactive Programming (FRP) that (1) is close in spirit
to the original FRP ideas, (2) does not have the
original space-leak problems, without using arrows or
advanced types, and (3) provides a simple and
expressive way for performing IO actions from FRP code.
We also provide a denotational semantics for this new
interface, and a technique (using Kripke logical
relations) for reasoning about which FRP functions may
``forget their past'', i.e. which functions do not have
an inherent space-leak. Finally, we show how we have
implemented this interface as a Haskell library called
FRPNow.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Bahr:2015:CSM,
author = "Patrick Bahr and Jost Berthold and Martin Elsman",
title = "Certified symbolic management of financial multi-party
contracts",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "315--327",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784747",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Domain-specific languages (DSLs) for complex financial
contracts are in practical use in many banks and
financial institutions today. Given the level of
automation and pervasiveness of software in the sector,
the financial domain is immensely sensitive to software
bugs. At the same time, there is an increasing need to
analyse (and report on) the interaction between
multiple parties. In this paper, we present a
multi-party contract language that rigorously relegates
any artefacts of simulation and computation from its
core, which leads to favourable algebraic properties,
and therefore allows for formalising domain-specific
analyses and transformations using a proof assistant.
At the centre of our formalisation is a simple
denotational semantics independent of any stochastic
aspects. Based on this semantics, we devise certified
contract analyses and transformations. In particular,
we give a type system, with an accompanying type
inference procedure, that statically ensures that
contracts follow the principle of causality. Moreover,
we devise a reduction semantics that allows us to
evolve contracts over time, in accordance with the
denotational semantics. From the verified Coq
definitions, we automatically extract a Haskell
implementation of an embedded contract DSL along with
the formally verified contract management
functionality. This approach opens a road map towards
more reliable contract management software, including
the possibility of analysing contracts based on
symbolic instead of numeric methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Smolka:2015:FCN,
author = "Steffen Smolka and Spiridon Eliopoulos and Nate Foster
and Arjun Guha",
title = "A fast compiler for {NetKAT}",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "328--341",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784761",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High-level programming languages play a key role in a
growing number of networking platforms, streamlining
application development and enabling precise formal
reasoning about network behavior. Unfortunately,
current compilers only handle ``local'' programs that
specify behavior in terms of hop-by-hop forwarding
behavior, or modest extensions such as simple paths. To
encode richer ``global'' behaviors, programmers must
add extra state --- something that is tricky to get
right and makes programs harder to write and maintain.
Making matters worse, existing compilers can take tens
of minutes to generate the forwarding state for the
network, even on relatively small inputs. This forces
programmers to waste time working around performance
issues or even revert to using hardware-level APIs.
This paper presents a new compiler for the NetKAT
language that handles rich features including regular
paths and virtual networks, and yet is several orders
of magnitude faster than previous compilers. The
compiler uses symbolic automata to calculate the extra
state needed to implement ``global'' programs, and an
intermediate representation based on binary decision
diagrams to dramatically improve performance. We
describe the design and implementation of three
essential compiler stages: from virtual programs (which
specify behavior in terms of virtual topologies) to
global programs (which specify network-wide behavior in
terms of physical topologies), from global programs to
local programs (which specify behavior in terms of
single-switch behavior), and from local programs to
hardware-level forwarding tables. We present results
from experiments on real-world benchmarks that quantify
performance in terms of compilation time and forwarding
table size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Stucki:2015:RVP,
author = "Nicolas Stucki and Tiark Rompf and Vlad Ureche and
Phil Bagwell",
title = "{RRB} vector: a practical general purpose immutable
sequence",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "342--354",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784739",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "State-of-the-art immutable collections have wildly
differing performance characteristics across their
operations, often forcing programmers to choose
different collection implementations for each task.
Thus, changes to the program can invalidate the choice
of collections, making code evolution costly. It would
be desirable to have a collection that performs well
for a broad range of operations. To this end, we
present the RRB-Vector, an immutable sequence
collection that offers good performance across a large
number of sequential and parallel operations. The
underlying innovations are: (1) the
Relaxed-Radix-Balanced (RRB) tree structure, which
allows efficient structural reorganization, and (2) an
optimization that exploits spatio-temporal locality on
the RRB data structure in order to offset the cost of
traversing the tree. In our benchmarks, the RRB-Vector
speedup for parallel operations is lower bounded by 7x
when executing on 4 CPUs of 8 cores each. The
performance for discrete operations, such as appending
on either end, or updating and removing elements, is
consistently good and compares favorably to the most
important immutable sequence collections in the
literature and in use today. The memory footprint of
RRB-Vector is on par with arrays and an order of
magnitude less than competing collections.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Jaskelioff:2015:FPS,
author = "Mauro Jaskelioff and Exequiel Rivas",
title = "Functional pearl: a smart view on datatypes",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "355--361",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784743",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Left-nested list concatenations, left-nested binds on
the free monad, and left-nested choices in many
non-determinism monads have an algorithmically bad
performance. Can we solve this problem without losing
the ability to pattern-match on the computation?
Surprisingly, there is a deceptively simple solution:
use a smart view to pattern-match on the datatype. We
introduce the notion of smart view and show how it
solves the problem of slow left-nested operations. In
particular, we use the technique to obtain fast and
simple implementations of lists, of free monads, and of
two non-determinism monads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Yang:2015:ECC,
author = "Edward Z. Yang and Giovanni Campagna and {\"O}mer S.
Agacan and Ahmed El-Hassany and Abhishek Kulkarni and
Ryan R. Newton",
title = "Efficient communication and collection with compact
normal forms",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "362--374",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784735",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In distributed applications, the transmission of
non-contiguous data structures is greatly slowed down
by the need to serialize them into a buffer before
sending. We describe Compact Normal Forms, an API that
allows programmers to explicitly place immutable heap
objects into regions, which can both be accessed like
ordinary data as well as efficiently transmitted over
the network. The process of placing objects into
compact regions (essentially a copy) is faster than any
serializer and can be amortized over a series of
functional updates to the data structure in question.
We implement this scheme in the Glasgow Haskell
Compiler and show that even with the space expansion
attendant with memory-oriented data structure
representations, we achieve between x2 and x4 speedups
on fast local networks with sufficiently large data
structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Keil:2015:BAH,
author = "Matthias Keil and Peter Thiemann",
title = "Blame assignment for higher-order contracts with
intersection and union",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "375--386",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784737",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an untyped calculus of blame assignment for
a higher-order contract system with two new operators:
intersection and union. The specification of these
operators is based on the corresponding type theoretic
constructions. This connection makes intersection and
union contracts their inevitable dynamic counterparts
with a range of desirable properties and makes them
suitable for subsequent integration in a gradual type
system. A denotational specification provides the
semantics of a contract in terms of two sets: a set of
terms satisfying the contract and a set of contexts
respecting the contract. This kind of specification for
contracts is novel and interesting in its own right. A
nondeterministic operational semantics serves as the
specification for contract monitoring and for proving
its correctness. It is complemented by a deterministic
semantics that is closer to an implementation and that
is connected to the nondeterministic semantics by
simulation. The calculus is the formal basis of TJS, a
language embedded, higher-order contract system
implemented for JavaScript.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Swords:2015:ECM,
author = "Cameron Swords and Amr Sabry and Sam Tobin-Hochstadt",
title = "Expressing contract monitors as patterns of
communication",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "387--399",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784742",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new approach to contract semantics which
expresses myriad monitoring strategies using a small
core of foundational communication primitives. This
approach allows multiple existing contract monitoring
approaches, ranging from Findler and Felleisen's
original model of higher-order contracts to semi-eager,
parallel, or asynchronous monitors, to be expressed in
a single language built on well-understood constructs.
We prove that this approach accurately simulates the
original semantics of higher-order contracts. A
straightforward implementation in Racket demonstrates
the practicality of our approach which not only
enriches existing Racket monitoring strategies, but
also support a new style of monitoring in which
collections of contracts collaborate to establish a
global invariant.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Zhu:2015:LRT,
author = "He Zhu and Aditya V. Nori and Suresh Jagannathan",
title = "Learning refinement types",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "400--411",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784766",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose the integration of a random test generation
system (capable of discovering program bugs) and a
refinement type system (capable of expressing and
verifying program invariants), for higher-order
functional programs, using a novel lightweight learning
algorithm as an effective intermediary between the two.
Our approach is based on the well-understood intuition
that useful, but difficult to infer, program properties
can often be observed from concrete program states
generated by tests; these properties act as likely
invariants, which if used to refine simple types, can
have their validity checked by a refinement type
checker. We describe an implementation of our technique
for a variety of benchmarks written in ML, and
demonstrate its effectiveness in inferring and proving
useful invariants for programs that express complex
higher-order control and dataflow.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Pavlinovic:2015:PSB,
author = "Zvonimir Pavlinovic and Tim King and Thomas Wies",
title = "Practical {SMT}-based type error localization",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "412--423",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784765",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compilers for statically typed functional programming
languages are notorious for generating confusing type
error messages. When the compiler detects a type error,
it typically reports the program location where the
type checking failed as the source of the error. Since
other error sources are not even considered, the actual
root cause is often missed. A more adequate approach is
to consider all possible error sources and report the
most useful one subject to some usefulness criterion.
In our previous work, we showed that this approach can
be formulated as an optimization problem related to
satisfiability modulo theories (SMT). This formulation
cleanly separates the heuristic nature of usefulness
criteria from the underlying search problem.
Unfortunately, algorithms that search for an optimal
error source cannot directly use principal types which
are crucial for dealing with the exponential-time
complexity of the decision problem of polymorphic type
checking. In this paper, we present a new algorithm
that efficiently finds an optimal error source in a
given ill-typed program. Our algorithm uses an improved
SMT encoding to cope with the high complexity of
polymorphic typing by iteratively expanding the typing
constraints from which principal types are derived. The
algorithm preserves the clean separation between the
heuristics and the actual search. We have implemented
our algorithm for OCaml. In our experimental
evaluation, we found that the algorithm reduces the
running times for optimal type error localization from
minutes to seconds and scales better than previous
localization algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Karachalias:2015:GMT,
author = "Georgios Karachalias and Tom Schrijvers and Dimitrios
Vytiniotis and Simon Peyton Jones",
title = "{GADTs} meet their match: pattern-matching warnings
that account for {GADTs}, guards, and laziness",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "424--436",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784748",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "For ML and Haskell, accurate warnings when a function
definition has redundant or missing patterns are
mission critical. But today's compilers generate bogus
warnings when the programmer uses guards (even simple
ones), GADTs, pattern guards, or view patterns. We give
the first algorithm that handles all these cases in a
single, uniform framework, together with an
implementation in GHC, and evidence of its utility in
practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '15 conference proceedings.",
}
@Article{Hague:2015:DRC,
author = "Matthew Hague and Anthony W. Lin and C.-H. Luke Ong",
title = "Detecting redundant {CSS} rules in {HTML5}
applications: a tree rewriting approach",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "1--19",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814288",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "HTML5 applications normally have a large set of CSS
(Cascading Style Sheets) rules for data display. Each
CSS rule consists of a node selector and a declaration
block (which assigns values to selected nodes' display
attributes). As web applications evolve, maintaining
CSS files can easily become problematic. Some CSS rules
will be replaced by new ones, but these obsolete (hence
redundant) CSS rules often remain in the applications.
Not only does this ``bloat'' the applications ---
increasing the bandwidth requirement --- but it also
significantly increases web browsers' processing time.
Most works on detecting redundant CSS rules in HTML5
applications do not consider the dynamic behaviours of
HTML5 (specified in JavaScript); in fact, the only
proposed method that takes these into account is
dynamic analysis, which cannot soundly prove redundancy
of CSS rules. In this paper, we introduce an
abstraction of HTML5 applications based on monotonic
tree-rewriting and study its ``redundancy problem''. We
establish the precise complexity of the problem and
various subproblems of practical importance (ranging
from P to EXP). In particular, our algorithm relies on
an efficient reduction to an analysis of symbolic
pushdown systems (for which highly optimised solvers
are available), which yields a fast method for checking
redundancy in practice. We implemented our algorithm
and demonstrated its efficacy in detecting redundant
CSS rules in HTML5 applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Demsky:2015:SSD,
author = "Brian Demsky and Patrick Lam",
title = "{SATCheck}: {SAT}-directed stateless model checking
for {SC} and {TSO}",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "20--36",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814297",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing low-level concurrent code is well known to be
challenging and error prone. The widespread deployment
of multi-core hardware and the shift towards using
low-level concurrent data structures has moved the
problem into the mainstream. Finding bugs in such code
may require finding a specific bug-revealing thread
interleaving out of a huge space of parallel
executions. Model-checking is a powerful technique for
exhaustively testing code. However, scaling model
checking presents a significant challenge. In this
paper we present a new and more scalable technique for
model checking concurrent code, based on concrete
execution. Our technique observes concrete behaviors,
builds a model of these behaviors, encodes the model in
SAT, and leverages SAT solver technology to find
executions that reveal new behaviors. It then runs the
new execution, incorporates the newly observed
behavior, and repeats the process until it has explored
all reachable behaviors. We have implemented a
prototype of our approach in the SATCheck tool. Our
tool supports both the Total Store Ordering (TSO) and
Sequentially Consistent (SC) memory models. We evaluate
SATCheck by testing several concurrent data structure
implementations and comparing its performance to the
original DPOR stateless model checking algorithm
implemented in CDSChecker, the source DPOR algorithm
implemented in Nidhugg, and CheckFence. Our experiments
show that SATCheck scales better than previous
approaches while at the same time operating on concrete
executions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Kuraj:2015:PES,
author = "Ivan Kuraj and Viktor Kuncak and Daniel Jackson",
title = "Programming with enumerable sets of structures",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "37--56",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814323",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an efficient, modular, and feature-rich
framework for automated generation and validation of
complex structures, suitable for tasks that explore a
large space of structured values. Our framework is
capable of exhaustive, incremental, parallel, and
memoized enumeration from not only finite but also
infinite domains, while providing fine-grained control
over the process. Furthermore, the framework
efficiently supports the inverse of enumeration
(checking whether a structure can be generated and
fast-forwarding to this structure to continue the
enumeration) and lazy enumeration (achieving exhaustive
testing without generating all structures). The
foundation of efficient enumeration lies in both direct
access to encoded structures, achieved with well-known
and new pairing functions, and dependent enumeration,
which embeds constraints into the enumeration to avoid
backtracking. Our framework defines an algebra of
enumerators, with combinators for their composition
that preserve exhaustiveness and efficiency. We have
implemented our framework as a domain-specific language
in Scala. Our experiments demonstrate better
performance and shorter specifications by up to a few
orders of magnitude compared to existing approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Jensen:2015:SMC,
author = "Casper S. Jensen and Anders M{\o}ller and Veselin
Raychev and Dimitar Dimitrov and Martin Vechev",
title = "Stateless model checking of event-driven
applications",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "57--73",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814282",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern event-driven applications, such as, web pages
and mobile apps, rely on asynchrony to ensure smooth
end-user experience. Unfortunately, even though these
applications are executed by a single event-loop
thread, they can still exhibit nondeterministic
behaviors depending on the execution order of
interfering asynchronous events. As in classic
shared-memory concurrency, this nondeterminism makes it
challenging to discover errors that manifest only in
specific schedules of events. In this work we propose
the first stateless model checker for event-driven
applications, called R4. Our algorithm systematically
explores the nondeterminism in the application and
concisely exposes its overall effect, which is useful
for bug discovery. The algorithm builds on a
combination of three key insights: (i) a dynamic
partial order reduction (DPOR) technique for reducing
the search space, tailored to the domain of
event-driven applications, (ii) conflict-reversal
bounding based on a hypothesis that most errors occur
with a small number of event reorderings, and (iii)
approximate replay of event sequences, which is
critical for separating harmless from harmful
nondeterminism. We instantiate R4 for the domain of
client-side web applications and use it to analyze
event interference in a number of real-world programs.
The experimental results indicate that the precision
and overall exploration capabilities of our system
significantly exceed that of existing techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Hottelier:2015:SLE,
author = "Thibaud Hottelier and Rastislav Bodik",
title = "Synthesis of layout engines from relational
constraints",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "74--88",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814291",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an algorithm for synthesizing efficient
document layout engines from compact relational
specifications. These specifications are compact in
that a single specification can produce multiple
engines, each for a distinct layout situation, i.e., a
different combination of known vs. unknown attributes.
Technically, our specifications are relational
attribute grammars, while our engines are functional
attribute grammars. By synthesizing functions from
relational constraints, we obviate the need for
constraint solving at runtime, because functional
attribute grammars can be easily evaluated according to
a fixed schedule, sidestepping the backtracking search
performed by constraint solvers. Our experiments show
that we can generate layout engines for non-trivial
data visualizations, and that our synthesized engines
are between 39- and 200-times faster than
general-purpose constraint solvers. Relational
specifications of layout give rise to synthesis
problems that have previously proved intractable. Our
algorithm exploits the hierarchical, grammar-based
structure of the specification, decomposing the
specification into smaller subproblems, which can be
tackled with off-the-shelf synthesis procedures. The
new synthesis problem then becomes the composition of
the functions thus generated into a correct attribute
grammar, which might be recursive. We show how to solve
this problem by efficient reduction to an SMT
problem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Erdweg:2015:SOI,
author = "Sebastian Erdweg and Moritz Lichter and Manuel Weiel",
title = "A sound and optimal incremental build system with
dynamic dependencies",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "89--106",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814316",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Build systems are used in all but the smallest
software projects to invoke the right build tools on
the right files in the right order. A build system must
be sound (after a build, generated files consistently
reflect the latest source files) and efficient (recheck
and rebuild as few build units as possible).
Contemporary build systems provide limited efficiency
because they lack support for expressing fine-grained
file dependencies. We present a build system called
pluto that supports the definition of reusable,
parameterized, interconnected builders. When run, a
builder notifies the build system about dynamically
required and produced files as well as about other
builders whose results are needed. To support
fine-grained file dependencies, we generalize the
traditional notion of time stamps to allow builders to
declare their actual requirements on a file's content.
pluto collects the requirements and products of a
builder with their stamps in a build summary. This
enables pluto to provides provably sound and optimal
incremental rebuilding. To support dynamic
dependencies, our rebuild algorithm interleaves
dependency analysis and builder execution and enforces
invariants on the dependency graph through a dynamic
analysis. We have developed pluto as a Java API and
used it to implement more than 25 builders. We describe
our experience with migrating a larger Ant build script
to pluto and compare the respective build times.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Polozov:2015:FFI,
author = "Oleksandr Polozov and Sumit Gulwani",
title = "{FlashMeta}: a framework for inductive program
synthesis",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "107--126",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814310",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Inductive synthesis, or programming-by-examples (PBE)
is gaining prominence with disruptive applications for
automating repetitive tasks in end-user programming.
However, designing, developing, and maintaining an
effective industrial-quality inductive synthesizer is
an intellectual and engineering challenge, requiring
1-2 man-years of effort. Our novel observation is that
many PBE algorithms are a natural fall-out of one
generic meta-algorithm and the domain-specific
properties of the operators in the underlying
domain-specific language (DSL). The meta-algorithm
propagates example-based constraints on an expression
to its subexpressions by leveraging associated witness
functions, which essentially capture the inverse
semantics of the underlying operator. This observation
enables a novel program synthesis methodology called
data-driven domain-specific deduction (D4), where
domain-specific insight, provided by the DSL designer,
is separated from the synthesis algorithm. Our
FlashMeta framework implements this methodology,
allowing synthesizer developers to generate an
efficient synthesizer from the mere DSL definition (if
properties of the DSL operators have been modeled). In
our case studies, we found that 10+ existing
industrial-quality mass-market applications based on
PBE can be cast as instances of D4. Our evaluation
includes reimplementation of some prior works, which in
FlashMeta become more efficient, maintainable, and
extensible. As a result, FlashMeta-based PBE tools are
deployed in several industrial products, including
Microsoft PowerShell 3.0 for Windows 10, Azure
Operational Management Suite, and Microsoft Cortana
digital assistant.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Zhang:2015:SYB,
author = "Haoyuan Zhang and Zewei Chu and Bruno C. d. S.
Oliveira and Tijs van der Storm",
title = "Scrap your boilerplate with object algebras",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "127--146",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814279",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traversing complex Abstract Syntax Trees (ASTs)
typically requires large amounts of tedious boilerplate
code. For many operations most of the code simply walks
the structure, and only a small portion of the code
implements the functionality that motivated the
traversal in the first place. This paper presents a
type-safe Java framework called Shy that removes much
of this boilerplate code. In Shy object algebras are
used to describe complex and extensible AST structures.
Using Java annotations Shy generates generic
boilerplate code for various types of traversals. For a
concrete traversal, users of Shy can then inherit from
the generated code and override only the interesting
cases. Consequently, the amount of code that users need
to write is significantly smaller. Moreover, traversals
using the Shy framework are also much more structure
shy, becoming more adaptive to future changes or
extensions to the AST structure. To prove the
effectiveness of the approach, we applied Shy in the
implementation of a domain-specific questionnaire
language. Our results show that for a large number of
traversals there was a significant reduction in the
amount of user-defined code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Sharma:2015:CCS,
author = "Rahul Sharma and Eric Schkufza and Berkeley Churchill
and Alex Aiken",
title = "Conditionally correct superoptimization",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "147--162",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814278",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The aggressive optimization of heavily used kernels is
an important problem in high-performance computing.
However, both general purpose compilers and highly
specialized tools such as superoptimizers often do not
have sufficient static knowledge of restrictions on
program inputs that could be exploited to produce the
very best code. For many applications, the best
possible code is conditionally correct: the optimized
kernel is equal to the code that it replaces only under
certain preconditions on the kernel's inputs. The main
technical challenge in producing conditionally correct
optimizations is in obtaining non-trivial and useful
conditions and proving conditional equivalence formally
in the presence of loops. We combine abstract
interpretation, decision procedures, and testing to
yield a verification strategy that can address both of
these problems. This approach yields a superoptimizer
for x86 that in our experiments produces binaries that
are often multiple times faster than those produced by
production compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Blackshear:2015:SCF,
author = "Sam Blackshear and Bor-Yuh Evan Chang and Manu
Sridharan",
title = "Selective control-flow abstraction via jumping",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "163--182",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814293",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present jumping, a form of selective control-flow
abstraction useful for improving the scalability of
goal-directed static analyses. Jumping is useful for
analyzing programs with complex control-flow such as
event-driven systems. In such systems, accounting for
orderings between certain events is important for
precision, yet analyzing the product graph of all
possible event orderings is intractable. Jumping solves
this problem by allowing the analysis to selectively
abstract away control-flow between events irrelevant to
a goal query while preserving information about the
ordering of relevant events. We present a framework for
designing sound jumping analyses and create an
instantiation of the framework for performing precise
inter-event analysis of Android applications. Our
experimental evaluation showed that using jumping to
augment a precise goal-directed analysis with
inter-event reasoning enabled our analysis to prove
90-97\% of dereferences safe across our benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Madhavan:2015:AGC,
author = "Ravichandhran Madhavan and Mika{\"e}l Mayer and Sumit
Gulwani and Viktor Kuncak",
title = "Automating grammar comparison",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "183--200",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814304",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider from a practical perspective the problem
of checking equivalence of context-free grammars. We
present techniques for proving equivalence, as well as
techniques for finding counter-examples that establish
non-equivalence. Among the key building blocks of our
approach is a novel algorithm for efficiently
enumerating and sampling words and parse trees from
arbitrary context-free grammars; the algorithm supports
polynomial time random access to words belonging to the
grammar. Furthermore, we propose an algorithm for
proving equivalence of context-free grammars that is
complete for LL grammars, yet can be invoked on any
context-free grammar, including ambiguous grammars. Our
techniques successfully find discrepancies between
different syntax specifications of several real-world
languages, and are capable of detecting fine-grained
incremental modifications performed on grammars. Our
evaluation shows that our tool improves significantly
on the existing available state of the art tools. In
addition, we used these algorithms to develop an online
tutoring system for grammars that we then used in an
undergraduate course on computer language processing.
On questions involving grammar constructions, our
system was able to automatically evaluate the
correctness of 95\% of the solutions submitted by
students: it disproved 74\% of cases and proved 21\% of
them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Ntzik:2015:RAP,
author = "Gian Ntzik and Philippa Gardner",
title = "Reasoning about the {POSIX} file system: local update
and global pathnames",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "201--220",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814306",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a program logic for specifying a core
sequential subset of the POSIX file system and for
reasoning abstractly about client programs working with
the file system. The challenge is to reason about the
combination of local directory update and global
pathname traversal (including '..' and symbolic links)
which may overlap the directories being updated.
Existing reasoning techniques are either based on
first-order logic and do not scale, or on separation
logic and can only handle linear pathnames (no '..' or
symbolic links). We introduce fusion logic for
reasoning about local update and global pathname
traversal, introducing a novel effect frame rule to
propagate the effect of a local update on overlapping
pathnames. We apply our reasoning to the standard
recursive remove utility ({\tt rm -r}), discovering
bugs in well-known implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Ou:2015:AAI,
author = "Peizhao Ou and Brian Demsky",
title = "{AutoMO}: automatic inference of memory order
parameters for {C\slash C++11}",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "221--240",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814286",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many concurrent data structures are initially designed
for the sequential consistency (SC) memory model.
Developers often implement these data structures on
real-world systems with weaker memory models by adding
sufficient fences to ensure that their implementation
on the weak memory model exhibits the same executions
as the SC memory model. Recently, the C11 and C++11
standards have added a weak memory model to the C and
C++ languages. Developing and debugging code for weak
memory models can be extremely challenging. We present
AutoMO, a framework to support porting data structures
designed for the SC memory model to the C/C++11 memory
model. AutoMO provides support across the porting
process: (1) it automatically infers initial settings
for the memory order parameters, (2) it detects whether
a C/C++11 execution is equivalent to some SC execution,
and (3) it simplifies traces to make them easier to
understand. We have used AutoMO to successfully infer
memory order parameters for a range of data structures
and to check whether executions of several concurrent
data structure implementations are SC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Biswas:2015:VES,
author = "Swarnendu Biswas and Minjia Zhang and Michael D. Bond
and Brandon Lucia",
title = "{Valor}: efficient, software-only region conflict
exceptions",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "241--259",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814292",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data races complicate programming language semantics,
and a data race is often a bug. Existing techniques
detect data races and define their semantics by
detecting conflicts between synchronization-free
regions (SFRs). However, such techniques either modify
hardware or slow programs dramatically, preventing
always-on use today. This paper describes Valor, a
sound, precise, software-only region conflict detection
analysis that achieves high performance by eliminating
the costly analysis on each read operation that prior
approaches require. Valor instead logs a region's reads
and lazily detects conflicts for logged reads when the
region ends. As a comparison, we have also developed
FastRCD, a conflict detector that leverages the epoch
optimization strategy of the FastTrack data race
detector. We evaluate Valor, FastRCD, and FastTrack,
showing that Valor dramatically outperforms FastRCD and
FastTrack. Valor is the first region conflict detector
to provide strong semantic guarantees for racy program
executions with under 2X slowdown. Overall, Valor
advances the state of the art in always-on support for
strong behavioral guarantees for data races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Cohen:2015:AMR,
author = "Nachshon Cohen and Erez Petrank",
title = "Automatic memory reclamation for lock-free data
structures",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "260--279",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814298",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Lock-free data-structures are widely employed in
practice, yet designing lock-free memory reclamation
for them is notoriously difficult. In particular, all
known lock-free reclamation schemes are ``manual'' in
the sense that the developer has to specify when nodes
have retired and may be reclaimed. Retiring nodes
adequately is non-trivial and often requires the
modification of the original lock-free algorithm. In
this paper we present an automatic lock-free
reclamation scheme for lock-free data-structures in the
spirit of a mark-sweep garbage collection. The proposed
algorithm works with any normalized lock-free algorithm
and with no need for the programmer to retire nodes or
make changes to the algorithm. Evaluation of the
proposed scheme on a linked-list and a hash table shows
that it performs similarly to the best manual
(lock-free) memory reclamation scheme.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Lopez:2015:PBV,
author = "Hugo A. L{\'o}pez and Eduardo R. B. Marques and
Francisco Martins and Nicholas Ng and C{\'e}sar Santos
and Vasco Thudichum Vasconcelos and Nobuko Yoshida",
title = "Protocol-based verification of message-passing
parallel programs",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "280--298",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814302",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present ParTypes, a type-based methodology for the
verification of Message Passing Interface (MPI)
programs written in the C programming language. The aim
is to statically verify programs against protocol
specifications, enforcing properties such as fidelity
and absence of deadlocks. We develop a protocol
language based on a dependent type system for
message-passing parallel programs, which includes
various communication operators, such as point-to-point
messages, broadcast, reduce, array scatter and gather.
For the verification of a program against a given
protocol, the protocol is first translated into a
representation read by VCC, a software verifier for C.
We successfully verified several MPI programs in a
running time that is independent of the number of
processes or other input parameters. This contrasts
with alternative techniques, notably model checking and
runtime verification, that suffer from the
state-explosion problem or that otherwise depend on
parameters to the program itself. We experimentally
evaluated our approach against state-of-the-art tools
for MPI to conclude that our approach offers a scalable
solution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Bastani:2015:IVA,
author = "Osbert Bastani and Saswat Anand and Alex Aiken",
title = "Interactively verifying absence of explicit
information flows in {Android} apps",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "299--315",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814274",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "App stores are increasingly the preferred mechanism
for distributing software, including mobile apps
(Google Play), desktop apps (Mac App Store and Ubuntu
Software Center), computer games (the Steam Store), and
browser extensions (Chrome Web Store). The centralized
nature of these stores has important implications for
security. While app stores have unprecedented ability
to audit apps, users now trust hosted apps, making them
more vulnerable to malware that evades detection and
finds its way onto the app store. Sound static explicit
information flow analysis has the potential to
significantly aid human auditors, but it is handicapped
by high false positive rates. Instead, auditors
currently rely on a combination of dynamic analysis
(which is unsound) and lightweight static analysis
(which cannot identify information flows) to help
detect malicious behaviors. We propose a process for
producing apps certified to be free of malicious
explicit information flows. In practice, imprecision in
the reachability analysis is a major source of false
positive information flows that are difficult to
understand and discharge. In our approach, the
developer provides tests that specify what code is
reachable, allowing the static analysis to restrict its
search to tested code. The app hosted on the store is
instrumented to enforce the provided specification
(i.e., executing untested code terminates the app). We
use abductive inference to minimize the necessary
instrumentation, and then interact with the developer
to ensure that the instrumentation only cuts
unreachable code. We demonstrate the effectiveness of
our approach in verifying a corpus of 77 Android
apps-our interactive verification process successfully
discharges 11 out of the 12 false positives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Brutschy:2015:SGD,
author = "Lucas Brutschy and Pietro Ferrara and Omer Tripp and
Marco Pistoia",
title = "{ShamDroid}: gracefully degrading functionality in the
presence of limited resource access",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "316--331",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814296",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Given a program whose functionality depends on access
to certain external resources, we investigate the
question of how to gracefully degrade functionality
when a subset of those resources is unavailable. The
concrete setting motivating this problem statement is
mobile applications, which rely on contextual data
(e.g., device identifiers, user location and contacts,
etc.) to fulfill their functionality. In particular, we
focus on the Android platform, which mediates access to
resources via an installation-time permission model. On
the one hand, granting an app the permission to access
a resource (e.g., the device ID) entails privacy
threats (e.g., releasing the device ID to advertising
servers). On the other hand, denying access to a
resource could render the app useless (e.g., if
inability to read the device ID is treated as an error
state). Our goal is to specialize an existing Android
app in such a way that it is disabled from accessing
certain sensitive resources (or contextual data) as
specified by the user, while still being able to
execute functionality that does not depend on those
resources. We present ShamDroid, a program
transformation algorithm, based on specialized forms of
program slicing, backwards static analysis and
constraint solving, that enables the use of Android
apps with partial permissions. We rigorously state the
guarantees provided by ShamDroid w.r.t. functionality
maximization. We provide an evaluation over the top 500
Google Play apps and report on an extensive comparative
evaluation of ShamDroid against three other
state-of-the-art solutions (APM, XPrivacy, and Google
App Ops) that mediate resource access at the system
(rather than app) level. ShamDroid performs better than
all of these tools by a significant margin, leading to
abnormal behavior in only 1 out of 27 apps we manually
investigated, compared to the other solutions, which
cause crashes and abnormalities in 9 or more of the
apps. This demonstrates the importance of performing
app-sensitive mocking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Bielik:2015:SRD,
author = "Pavol Bielik and Veselin Raychev and Martin Vechev",
title = "Scalable race detection for {Android} applications",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "332--348",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814303",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a complete end-to-end dynamic analysis
system for finding data races in mobile Android
applications. The capabilities of our system
significantly exceed the state of the art: our system
can analyze real-world application interactions in
minutes rather than hours, finds errors inherently
beyond the reach of existing approaches, while still
(critically) reporting very few false positives. Our
system is based on three key concepts: (i) a thorough
happens-before model of Android-specific concurrency,
(ii) a scalable analysis algorithm for efficiently
building and querying the happens-before graph, and
(iii) an effective set of domain-specific filters that
reduce the number of reported data races by several
orders of magnitude. We evaluated the usability and
performance of our system on 354 real-world Android
applications (e.g., Facebook). Our system analyzes a
minute of end-user interaction with the application in
about 24 seconds, while current approaches take hours
to complete. Inspecting the results for 8 large
open-source applications revealed 15 harmful bugs of
diverse kinds. Some of the bugs we reported were
confirmed and fixed by developers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Hu:2015:VYL,
author = "Yongjian Hu and Tanzirul Azim and Iulian Neamtiu",
title = "Versatile yet lightweight record-and-replay for
{Android}",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "349--366",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814320",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recording and replaying the execution of smartphone
apps is useful in a variety of contexts, from
reproducing bugs to profiling and testing. Achieving
effective record-and-replay is a balancing act between
accuracy and overhead. On smartphones, the act is
particularly complicated, because smartphone apps
receive a high-bandwidth stream of input (e.g.,
network, GPS, camera, microphone, touchscreen) and
concurrency events, but the stream has to be recorded
and replayed with minimal overhead, to avoid
interfering with app execution. Prior record-and-replay
approaches have focused on replaying machine
instructions or system calls, which is not a good fit
on smartphones. We propose a novel, stream-oriented
record-and-replay approach which achieves high-accuracy
and low-overhead by aiming at a sweet spot: recording
and replaying sensor and network input, event
schedules, and inter-app communication via intents. To
demonstrate the versatility of our approach, we have
constructed a tool named VALERA that supports
record-and-replay on the Android platform. VALERA works
with apps running directly on the phone, and does not
require access to the app source code. Through an
evaluation on 50 popular Android apps, we show that:
VALERA's replay fidelity far exceeds current
record-and-replay approaches for Android; VALERA's
precise timing control and low overhead (about 1\% for
either record or replay) allows it to replay
high-throughput, timing-sensitive apps such as
video/audio capture and recognition; and VALERA's
support for event schedule replay enables the
construction of useful analyses, such as reproducing
event-driven race bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Bender:2015:DFI,
author = "John Bender and Mohsen Lesani and Jens Palsberg",
title = "Declarative fence insertion",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "367--385",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814318",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Previous work has shown how to insert fences that
enforce sequential consistency. However, for many
concurrent algorithms, sequential consistency is
unnecessarily strong and can lead to high execution
overhead. The reason is that, often, correctness relies
on the execution order of a few specific pairs of
instructions. Algorithm designers can declare those
execution orders and thereby enable
memory-model-independent reasoning about correctness
and also ease implementation of algorithms on multiple
platforms. The literature has examples of such
reasoning, while tool support for enforcing the orders
has been lacking until now. In this paper we present a
declarative approach to specify and enforce execution
orders. Our fence insertion algorithm first identifies
the execution orders that a given memory model enforces
automatically, and then inserts fences that enforce the
rest. Our benchmarks include three off-the-shelf
transactional memory algorithms written in C/C++ for
which we specify suitable execution orders. For those
benchmarks, our experiments with the x86 and ARMv7
memory models show that our tool inserts fences that
are competitive with those inserted by the original
authors. Our tool is the first to insert fences into
transactional memory algorithms and it solves the
long-standing problem of how to easily port such
algorithms to a novel memory model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Le:2015:FDC,
author = "Vu Le and Chengnian Sun and Zhendong Su",
title = "Finding deep compiler bugs via guided stochastic
program mutation",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "386--399",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814319",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compiler testing is important and challenging.
Equivalence Modulo Inputs (EMI) is a recent promising
approach for compiler validation. It is based on
mutating the unexecuted statements of an existing
program under some inputs to produce new equivalent
test programs w.r.t. these inputs. Orion is a simple
realization of EMI by only randomly deleting unexecuted
statements. Despite its success in finding many bugs in
production compilers, Orion's effectiveness is still
limited by its simple, blind mutation strategy. To more
effectively realize EMI, this paper introduces a
guided, advanced mutation strategy based on Bayesian
optimization. Our goal is to generate diverse programs
to more thoroughly exercise compilers. We achieve this
with two techniques: (1) the support of both code
deletions and insertions in the unexecuted regions,
leading to a much larger test program space; and (2)
the use of an objective function that promotes
control-flow-diverse programs for guiding Markov Chain
Monte Carlo (MCMC) optimization to explore the search
space. Our technique helps discover deep bugs that
require elaborate mutations. Our realization, Athena,
targets C compilers. In 19 months, Athena has found 72
new bugs --- many of which are deep and important bugs
--- in GCC and LLVM. Developers have confirmed all 72
bugs and fixed 68 of them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Wang:2015:VAR,
author = "Haichuan Wang and David Padua and Peng Wu",
title = "Vectorization of {Apply} to reduce interpretation
overhead of {R}",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "400--415",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814273",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "R is a popular dynamic language designed for
statistical computing. Despite R's huge user base, the
inefficiency in R's language implementation becomes a
major pain-point in everyday use as well as an obstacle
to apply R to solve large scale analytics problems. The
two most common approaches to improve the performance
of dynamic languages are: implementing more efficient
interpretation strategies and extending the interpreter
with Just-In-Time (JIT) compiler. However, both
approaches require significant changes to the
interpreter, and complicate the adoption by development
teams as a result. This paper presents a new approach
to improve execution efficiency of R programs by
vectorizing the widely used Apply class of operations.
Apply accepts two parameters: a function and a
collection of input data elements. The standard
implementation of Apply iteratively invokes the input
function with each element in the data collection. Our
approach combines data transformation and function
vectorization to convert the looping-over-data
execution of the standard Apply into a single
invocation of a vectorized function that contains a
sequence of vector operations over the input data. This
conversion can significantly speed-up the execution of
Apply operations in R by reducing the number of
interpretation steps. We implemented the vectorization
transformation as an R package. To enable the
optimization, all that is needed is to invoke the
package, and the user can use a normal R interpreter
without any changes. The evaluation shows that the
proposed method delivers significant performance
improvements for a collection of data analysis
algorithm benchmarks. This is achieved without any
native code generation and using only a single-thread
of execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Gvero:2015:SJE,
author = "Tihomir Gvero and Viktor Kuncak",
title = "Synthesizing {Java} expressions from free-form
queries",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "416--432",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814295",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new code assistance tool for integrated
development environments. Our system accepts as input
free-form queries containing a mixture of English and
Java, and produces Java code expressions that take the
query into account and respect syntax, types, and
scoping rules of Java, as well as statistical usage
patterns. In contrast to solutions based on code
search, the results returned by our tool need not
directly correspond to any previously seen code
fragment. As part of our system we have constructed a
probabilistic context free grammar for Java constructs
and library invocations, as well as an algorithm that
uses a customized natural language processing tool
chain to extract information from free-form text
queries. We present the results on a number of examples
showing that our technique (1) often produces the
expected code fragments, (2) tolerates much of the
flexibility of natural language, and (3) can repair
incorrect Java expressions that use, for example, the
wrong syntax or missing arguments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Zheng:2015:APP,
author = "Yudi Zheng and Lubom{\'\i}r Bulej and Walter Binder",
title = "Accurate profiling in the presence of dynamic
compilation",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "433--450",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814281",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many profilers based on bytecode instrumentation yield
wrong results in the presence of an optimizing dynamic
compiler, either due to not being aware of
optimizations such as stack allocation and method
inlining, or due to the inserted code disrupting such
optimizations. To avoid such perturbations, we present
a novel technique to make any profiler implemented at
the bytecode level aware of optimizations performed by
the dynamic compiler. We implement our approach in a
state-of-the-art Java virtual machine and demonstrate
its significance with concrete profilers. We quantify
the impact of escape analysis on allocation profiling,
object life-time analysis, and the impact of method
inlining on callsite profiling. We illustrate how our
approach enables new kinds of profilers, such as a
profiler for non-inlined callsites, and a testing
framework for locating performance bugs in dynamic
compiler implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Aigner:2015:FMS,
author = "Martin Aigner and Christoph M. Kirsch and Michael
Lippautz and Ana Sokolova",
title = "Fast, multicore-scalable, low-fragmentation memory
allocation through large virtual memory and global data
structures",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "451--469",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814294",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We demonstrate that general-purpose memory allocation
involving many threads on many cores can be done with
high performance, multicore scalability, and low memory
consumption. For this purpose, we have designed and
implemented scalloc, a concurrent allocator that
generally performs and scales in our experiments better
than other allocators while using less memory, and is
still competitive otherwise. The main ideas behind the
design of scalloc are: uniform treatment of small and
big objects through so-called virtual spans,
efficiently and effectively reclaiming free memory
through fast and scalable global data structures, and
constant-time (modulo synchronization) allocation and
deallocation operations that trade off memory reuse and
spatial locality without being subject to false
sharing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Boston:2015:PTI,
author = "Brett Boston and Adrian Sampson and Dan Grossman and
Luis Ceze",
title = "Probability type inference for flexible approximate
programming",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "470--487",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814301",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In approximate computing, programs gain efficiency by
allowing occasional errors. Controlling the
probabilistic effects of this approximation remains a
key challenge. We propose a new approach where
programmers use a type system to communicate high-level
constraints on the degree of approximation. A
combination of type inference, code specialization, and
optional dynamic tracking makes the system expressive
and convenient. The core type system captures the
probability that each operation exhibits an error and
bounds the probability that each expression deviates
from its correct value. Solver-aided type inference
lets the programmer specify the correctness probability
on only some variables-program outputs, for example-and
automatically fills in other types to meet these
specifications. An optional dynamic type helps cope
with complex run-time behavior where static approaches
are insufficient. Together, these features interact to
yield a high degree of programmer control while
offering a strong soundness guarantee. We use existing
approximate-computing benchmarks to show how our
language, DECAF, maintains a low annotation burden. Our
constraint-based approach can encode hardware details,
such as finite degrees of reliability, so we also use
DECAF to examine implications for approximate hardware
design. We find that multi-level architectures can
offer advantages over simpler two-level machines and
that solver-aided optimization improves efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Jantz:2015:CLM,
author = "Michael R. Jantz and Forrest J. Robinson and Prasad A.
Kulkarni and Kshitij A. Doshi",
title = "Cross-layer memory management for managed language
applications",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "488--504",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814322",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Performance and energy efficiency in memory have
become critically important for a wide range of
computing domains. However, it is difficult to control
and optimize memory power and performance because these
effects depend upon activity across multiple layers of
the vertical execution stack. To address this
challenge, we construct a novel and collaborative
framework that employs object placement, cross-layer
communication, and page-level management to effectively
distribute application objects in the DRAM hardware to
achieve desired power/performance goals. In this work,
we describe the design and implementation of our
framework, which is the first to integrate automatic
object profiling and analysis at the application layer
with fine-grained management of memory hardware
resources in the operating system. We demonstrate the
utility of our framework by employing it to more
effectively control memory power consumption. We design
a custom memory-intensive workload to show the
potential of our approach. Next, we develop sampling
and profiling-based analyses and modify the code
generator in the HotSpot VM to understand object usage
patterns and automatically determine and control the
placement of hot and cold objects in a partitioned VM
heap. This information is communicated to the operating
system, which uses it to map the logical application
pages to the appropriate DRAM ranks according to
user-defined provisioning goals. We evaluate our
framework and find that it achieves our test goal of
significant DRAM energy savings across a variety of
workloads, without any source code modifications or
recompilations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Madsen:2015:SAE,
author = "Magnus Madsen and Frank Tip and Ondrej Lhot{\'a}k",
title = "Static analysis of event-driven {Node.js JavaScript}
applications",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "505--519",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814272",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many JavaScript programs are written in an
event-driven style. In particular, in server-side
Node.js applications, operations involving sockets,
streams, and files are typically performed in an
asynchronous manner, where the execution of listeners
is triggered by events. Several types of programming
errors are specific to such event-based programs (e.g.,
unhandled events, and listeners that are registered too
late). We present the event-based call graph, a program
representation that can be used to detect bugs related
to event handling. We have designed and implemented
three analyses for constructing event-based call
graphs. Our results show that these analyses are
capable of detecting problems reported on
StackOverflow. Moreover, we show that the number of
false positives reported by the analysis on a suite of
small Node.js applications is manageable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Feng:2015:EQD,
author = "Yu Feng and Xinyu Wang and Isil Dillig and Calvin
Lin",
title = "{EXPLORER} : query- and demand-driven exploration of
interprocedural control flow properties",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "520--534",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814284",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes a general framework and its
implementation in a tool called EXPLORER for statically
answering a class of interprocedural control flow
queries about Java programs. EXPLORER allows users to
formulate queries about feasible callstack
configurations using regular expressions, and it
employs a precise, demand-driven algorithm for
answering such queries. Specifically, EXPLORER
constructs an automaton A that is iteratively refined
until either the language accepted by A is empty
(meaning that the query has been refuted) or until no
further refinement is possible based on a precise,
context-sensitive abstraction of the program. We
evaluate EXPLORER by applying it to three different
program analysis tasks, namely, (1) analysis of the
observer design pattern in Java, (2) identification of
a class of performance bugs, and (3) analysis of
inter-component communication in Android applications.
Our evaluation shows that EXPLORER is both efficient
and precise.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Dietrich:2015:GSE,
author = "Jens Dietrich and Nicholas Hollingum and Bernhard
Scholz",
title = "Giga-scale exhaustive points-to analysis for {Java} in
under a minute",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "535--551",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814307",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computing a precise points-to analysis for very large
Java programs remains challenging despite the large
body of research on points-to analysis. Any approach
must solve an underlying dynamic graph reachability
problem, for which the best algorithms have near-cubic
worst-case runtime complexity, and, hence, previous
work does not scale to programs with millions of lines
of code. In this work, we present a novel approach for
solving the field-sensitive points-to problem for Java
with the means of (1) a transitive-closure
data-structure, and (2) a pre-computed set of
potentially matching load/store pairs to accelerate the
fix-point calculation. Experimentation on Java
benchmarks validates the superior performance of our
approach over the standard context-free language
reachability implementations. Our approach computes a
points-to index for the OpenJDK with over 1.5 billion
tuples in under a minute.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Darais:2015:GTM,
author = "David Darais and Matthew Might and David {Van Horn}",
title = "{Galois} transformers and modular abstract
interpreters: reusable metatheory for program
analysis",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "552--571",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814308",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The design and implementation of static analyzers has
become increasingly systematic. Yet for a given
language or analysis feature, it often requires tedious
and error prone work to implement an analyzer and prove
it sound. In short, static analysis features and their
proofs of soundness do not compose well, causing a
dearth of reuse in both implementation and metatheory.
We solve the problem of systematically constructing
static analyzers by introducing Galois transformers:
monad transformers that transport Galois connection
properties. In concert with a monadic interpreter, we
define a library of monad transformers that implement
building blocks for classic analysis parameters like
context, path, and heap (in)sensitivity. Moreover,
these can be composed together independent of the
language being analyzed. Significantly, a Galois
transformer can be proved sound once and for all,
making it a reusable analysis component. As new
analysis features and abstractions are developed and
mixed in, soundness proofs need not be reconstructed,
as the composition of a monad transformer stack is
sound by virtue of its constituents. Galois
transformers provide a viable foundation for reusable
and composable metatheory for program analysis.
Finally, these Galois transformers shift the level of
abstraction in analysis design and implementation to a
level where non-specialists have the ability to
synthesize sound analyzers over a number of
parameters.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Oh:2015:LSA,
author = "Hakjoo Oh and Hongseok Yang and Kwangkeun Yi",
title = "Learning a strategy for adapting a program analysis
via {Bayesian} optimisation",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "572--588",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814309",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Building a cost-effective static analyser for
real-world programs is still regarded an art. One key
contributor to this grim reputation is the difficulty
in balancing the cost and the precision of an analyser.
An ideal analyser should be adaptive to a given
analysis task, and avoid using techniques that
unnecessarily improve precision and increase analysis
cost. However, achieving this ideal is highly
nontrivial, and it requires a large amount of
engineering efforts. In this paper we present a new
approach for building an adaptive static analyser. In
our approach, the analyser includes a sophisticated
parameterised strategy that decides, for each part of a
given program, whether to apply a precision-improving
technique to that part or not. We present a method for
learning a good parameter for such a strategy from an
existing codebase via Bayesian optimisation. The learnt
strategy is then used for new, unseen programs. Using
our approach, we developed partially flow- and
context-sensitive variants of a realistic C static
analyser. The experimental results demonstrate that
using Bayesian optimisation is crucial for learning
from an existing codebase. Also, they show that among
all program queries that require flow- or
context-sensitivity, our partially flow- and
context-sensitive analysis answers the 75\% of them,
while increasing the analysis cost only by 3.3x of the
baseline flow- and context-insensitive analysis, rather
than 40x or more of the fully sensitive version.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Alves:2015:RPD,
author = "P{\'e}ricles Alves and Fabian Gruber and Johannes
Doerfert and Alexandros Lamprineas and Tobias Grosser
and Fabrice Rastello and Fernando Magno Quint{\~a}o
Pereira",
title = "Runtime pointer disambiguation",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "589--606",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814285",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To optimize code effectively, compilers must deal with
memory dependencies. However, the state-of-the-art
heuristics available in the literature to track memory
dependencies are inherently imprecise and
computationally expensive. Consequently, the most
advanced code transformations that compilers have today
are ineffective when applied on real-world programs.
The goal of this paper is to solve this conundrum
through dynamic disambiguation of pointers. We provide
different ways to determine at runtime when two memory
locations can overlap. We then produce two versions of
a code region: one that is aliasing-free --- hence,
easy to optimize --- and another that is not. Our
checks let us safely branch to the optimizable region.
We have applied these ideas on Polly-LLVM, a loop
optimizer built on top of the LLVM compilation
infrastructure. Our experiments indicate that our
method is precise, effective and useful: we can
disambiguate every pair of pointer in the loop
intensive Polybench benchmark suite. The result of this
precision is code quality: the binaries we generate are
10\% faster than those that Polly-LLVM produces without
our optimization, at the {\tt -O3} optimization level
of LLVM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Toffola:2015:PPY,
author = "Luca Della Toffola and Michael Pradel and Thomas R.
Gross",
title = "Performance problems you can fix: a dynamic analysis
of memoization opportunities",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "607--622",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814290",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Performance bugs are a prevalent problem and recent
research proposes various techniques to identify such
bugs. This paper addresses a kind of performance
problem that often is easy to address but difficult to
identify: redundant computations that may be avoided by
reusing already computed results for particular inputs,
a technique called memoization. To help developers find
and use memoization opportunities, we present
MemoizeIt, a dynamic analysis that identifies methods
that repeatedly perform the same computation. The key
idea is to compare inputs and outputs of method calls
in a scalable yet precise way. To avoid the overhead of
comparing objects at all method invocations in detail,
MemoizeIt first compares objects without following any
references and iteratively increases the depth of
exploration while shrinking the set of considered
methods. After each iteration, the approach ignores
methods that cannot benefit from memoization, allowing
it to analyze calls to the remaining methods in more
detail. For every memoization opportunity that
MemoizeIt detects, it provides hints on how to
implement memoization, making it easy for the developer
to fix the performance issue. Applying MemoizeIt to
eleven real-world Java programs reveals nine profitable
memoization opportunities, most of which are missed by
traditional CPU time profilers, conservative compiler
optimizations, and other existing approaches for
finding performance bugs. Adding memoization as
proposed by MemoizeIt leads to statistically
significant speedups by factors between 1.04x and
12.93x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Lee:2015:RRA,
author = "Wen-Chuan Lee and Tao Bao and Yunhui Zheng and Xiangyu
Zhang and Keval Vora and Rajiv Gupta",
title = "{RAIVE}: runtime assessment of floating-point
instability by vectorization",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "623--638",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814299",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Floating point representation has limited precision
and inputs to floating point programs may also have
errors. Consequently, during execution, errors are
introduced, propagated, and accumulated, leading to
unreliable outputs. We call this the instability
problem. We propose RAIVE, a technique that identifies
output variations of a floating point execution in the
presence of instability. RAIVE transforms every
floating point value to a vector of multiple values ---
the values added to create the vector are obtained by
introducing artificial errors that are upper bounds of
actual errors. The propagation of artificial errors
models the propagation of actual errors. When values in
vectors result in discrete execution differences (e.g.,
following different paths), the execution is forked to
capture the resulting output variations. Our evaluation
shows that RAIVE can precisely capture output
variations. Its overhead (340\%) is 2.43 times lower
than the state of the art",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Fu:2015:ABE,
author = "Zhoulai Fu and Zhaojun Bai and Zhendong Su",
title = "Automated backward error analysis for numerical code",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "639--654",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814317",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Numerical code uses floating-point arithmetic and
necessarily suffers from roundoff and truncation
errors. Error analysis is the process to quantify such
uncertainty in the solution to a problem. Forward error
analysis and backward error analysis are two popular
paradigms of error analysis. Forward error analysis is
more intuitive and has been explored and automated by
the programming languages (PL) community. In contrast,
although backward error analysis is more preferred by
numerical analysts and the foundation for numerical
stability, it is less known and unexplored by the PL
community. To fill the gap, this paper presents an
automated backward error analysis for numerical code to
empower both numerical analysts and application
developers. In addition, we use the computed backward
error results to also compute the condition number, an
important quantity recognized by numerical analysts for
measuring how sensitive a function is to changes or
errors in the input. Experimental results on Intel X87
FPU functions and widely-used GNU C Library functions
demonstrate that our analysis is effective at analyzing
the accuracy of floating-point programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Voelter:2015:UCL,
author = "Markus Voelter and Arie van Deursen and Bernd Kolb and
Stephan Eberle",
title = "Using C {language} extensions for developing embedded
software: a case study",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "655--674",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814276",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We report on an industrial case study on developing
the embedded software for a smart meter using the C
programming language and domain-specific extensions of
C such as components, physical units, state machines,
registers and interrupts. We find that the extensions
help significantly with managing the complexity of the
software. They improve testability mainly by supporting
hardware-independent testing, as illustrated by low
integration efforts. The extensions also do not incur
significant overhead regarding memory consumption and
performance. Our case study relies on mbeddr, an
extensible version of C. mbeddr, in turn, builds on the
MPS language workbench which supports modular extension
of languages and IDEs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Lopes:2015:HSA,
author = "Cristina V. Lopes and Joel Ossher",
title = "How scale affects structure in {Java} programs",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "675--694",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814300",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many internal software metrics and external quality
attributes of Java programs correlate strongly with
program size. This knowledge has been used pervasively
in quantitative studies of software through practices
such as normalization on size metrics. This paper
reports size-related super- and sublinear effects that
have not been known before. Findings obtained on a very
large collection of Java programs --- 30,911 projects
hosted at Google Code as of Summer 2011 --- unveils how
certain characteristics of programs vary
disproportionately with program size, sometimes even
non-monotonically. Many of the specific parameters of
nonlinear relations are reported. This result gives
further insights for the differences of ``programming
in the small'' vs. ``programming in the large.'' The
reported findings carry important consequences for OO
software metrics, and software research in general:
metrics that have been known to correlate with size can
now be properly normalized so that all the information
that is left in them is size-independent.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Mastrangelo:2015:UYO,
author = "Luis Mastrangelo and Luca Ponzanelli and Andrea Mocci
and Michele Lanza and Matthias Hauswirth and Nathaniel
Nystrom",
title = "Use at your own risk: the {Java} unsafe {API} in the
wild",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "695--710",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814313",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Java is a safe language. Its runtime environment
provides strong safety guarantees that any Java
application can rely on. Or so we think. We show that
the runtime actually does not provide these
guarantees---for a large fraction of today's Java code.
Unbeknownst to many application developers, the Java
runtime includes a ``backdoor'' that allows expert
library and framework developers to circumvent Java's
safety guarantees. This backdoor is there by design,
and is well known to experts, as it enables them to
write high-performance ``systems-level'' code in Java.
For much the same reasons that safe languages are
preferred over unsafe languages, these powerful---but
unsafe---capabilities in Java should be restricted.
They should be made safe by changing the language, the
runtime system, or the libraries. At the very least,
their use should be restricted. This paper is a step in
that direction. We analyzed 74 GB of compiled Java
code, spread over 86,479 Java archives, to determine
how Java's unsafe capabilities are used in real-world
libraries and applications. We found that 25\% of Java
bytecode archives depend on unsafe third-party Java
code, and thus Java's safety guarantees cannot be
trusted. We identify 14 different usage patterns of
Java's unsafe capabilities, and we provide supporting
evidence for why real-world code needs these
capabilities. Our long-term goal is to provide a
foundation for the design of new language features to
regain safety in Java.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Achour:2015:ACO,
author = "Sara Achour and Martin C. Rinard",
title = "Approximate computation with outlier detection in
{Topaz}",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "711--730",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814314",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Topaz, a new task-based language for
computations that execute on approximate computing
platforms that may occasionally produce arbitrarily
inaccurate results. Topaz maps tasks onto the
approximate hardware and integrates the generated
results into the main computation. To prevent
unacceptably inaccurate task results from corrupting
the main computation, Topaz deploys a novel outlier
detection mechanism that recognizes and precisely
reexecutes outlier tasks. Outlier detection enables
Topaz to work effectively with approximate hardware
platforms that have complex fault characteristics,
including platforms with bit pattern dependent faults
(in which the presence of faults may depend on values
stored in adjacent memory cells). Our experimental
results show that, for our set of benchmark
applications, outlier detection enables Topaz to
deliver acceptably accurate results (less than 1\%
error) on our target approximate hardware platforms.
Depending on the application and the hardware platform,
the overall energy savings range from 5 to 13 percent.
Without outlier detection, only one of the applications
produces acceptably accurate results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Wickerson:2015:RSP,
author = "John Wickerson and Mark Batty and Bradford M. Beckmann
and Alastair F. Donaldson",
title = "Remote-scope promotion: clarified, rectified, and
verified",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "731--747",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814283",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern accelerator programming frameworks, such as
OpenCL, organise threads into work-groups. Remote-scope
promotion (RSP) is a language extension recently
proposed by AMD researchers that is designed to enable
applications, for the first time, both to optimise for
the common case of intra-work-group communication
(using memory scopes to provide consistency only within
a work-group) and to allow occasional inter-work-group
communication (as required, for instance, to support
the popular load-balancing idiom of work stealing). We
present the first formal, axiomatic memory model of
OpenCL extended with RSP. We have extended the Herd
memory model simulator with support for OpenCL kernels
that exploit RSP, and used it to discover bugs in
several litmus tests and a work-stealing queue, that
have been used previously in the study of RSP. We have
also formalised the proposed GPU implementation of RSP.
The formalisation process allowed us to identify bugs
in the description of RSP that could result in
well-synchronised programs experiencing memory
inconsistencies. We present and prove sound a new
implementation of RSP that incorporates bug fixes and
requires less non-standard hardware than the original
implementation. This work, a collaboration between
academia and industry, clearly demonstrates how, when
designing hardware support for a new concurrent
language feature, the early application of formal tools
and techniques can help to prevent errors, such as
those we have found, from making it into silicon.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Hammer:2015:ICN,
author = "Matthew A. Hammer and Joshua Dunfield and Kyle Headley
and Nicholas Labich and Jeffrey S. Foster and Michael
Hicks and David {Van Horn}",
title = "Incremental computation with names",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "748--766",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814305",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the past thirty years, there has been significant
progress in developing general-purpose, language-based
approaches to incremental computation, which aims to
efficiently update the result of a computation when an
input is changed. A key design challenge in such
approaches is how to provide efficient incremental
support for a broad range of programs. In this paper,
we argue that first-class names are a critical
linguistic feature for efficient incremental
computation. Names identify computations to be reused
across differing runs of a program, and making them
first class gives programmers a high level of control
over reuse. We demonstrate the benefits of names by
presenting Nominal Adapton, an ML-like language for
incremental computation with names. We describe how to
use Nominal Adapton to efficiently incrementalize
several standard programming patterns---including maps,
folds, and unfolds---and show how to build efficient,
incremental probabilistic trees and tries. Since
Nominal Adapton's implementation is subtle, we
formalize it as a core calculus and prove it is
from-scratch consistent, meaning it always produces the
same answer as simply re-running the computation.
Finally, we demonstrate that Nominal Adapton can
provide large speedups over both from-scratch
computation and Adapton, a previous state-of-the-art
incremental computation system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Felgentreff:2015:CBC,
author = "Tim Felgentreff and Todd Millstein and Alan Borning
and Robert Hirschfeld",
title = "Checks and balances: constraint solving without
surprises in object-constraint programming languages",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "767--782",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814311",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Object-constraint programming systems integrate
declarative constraint solving with imperative,
object-oriented languages, seamlessly providing the
power of both paradigms. However, experience with
object-constraint systems has shown that giving too
much power to the constraint solver opens up the
potential for solutions that are surprising and
unintended as well as for complex interactions between
constraints and imperative code. On the other hand,
systems that overly limit the power of the solver, for
example by disallowing constraints involving mutable
objects, object identity, or polymorphic message sends,
run the risk of excluding the core object-oriented
features of the language from the constraint part, and
consequently not being able to express declaratively a
large set of interesting problem solutions. In this
paper we present design principles that tame the power
of the constraint solver in object-constraint languages
to avoid difficult corner cases and surprising
solutions while retaining the key features of the
approach, including constraints over mutable objects,
constraints involving object identity, and constraints
on the results of message sends. We present our
solution concretely in the context of the Babelsberg
object-constraint language framework, providing both an
informal description of the resulting language and a
formal semantics for a core subset of it. We validate
the utility of this semantics with an executable
version that allows us to run test programs and to
verify that they provide the same results as existing
implementations of Babelsberg in JavaScript, Ruby, and
Smalltalk.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Steindorfer:2015:OHA,
author = "Michael J. Steindorfer and Jurgen J. Vinju",
title = "Optimizing hash-array mapped tries for fast and lean
immutable {JVM} collections",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "783--800",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814312",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The data structures under-pinning collection API (e.g.
lists, sets, maps) in the standard libraries of
programming languages are used intensively in many
applications. The standard libraries of recent Java
Virtual Machine languages, such as Clojure or Scala,
contain scalable and well-performing immutable
collection data structures that are implemented as
Hash-Array Mapped Tries (HAMTs). HAMTs already feature
efficient lookup, insert, and delete operations,
however due to their tree-based nature their memory
footprints and the runtime performance of iteration and
equality checking lag behind array-based counterparts.
This particularly prohibits their application in
programs which process larger data sets. In this paper,
we propose changes to the HAMT design that increase the
overall performance of immutable sets and maps. The
resulting general purpose design increases cache
locality and features a canonical representation. It
outperforms Scala's and Clojure's data structure
implementations in terms of memory footprint and
runtime efficiency of iteration (1.3-6.7x) and equality
checking (3-25.4x).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Ureche:2015:AAH,
author = "Vlad Ureche and Aggelos Biboudis and Yannis
Smaragdakis and Martin Odersky",
title = "Automating ad hoc data representation
transformations",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "801--820",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814271",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To maximize run-time performance, programmers often
specialize their code by hand, replacing library
collections and containers by custom objects in which
data is restructured for efficient access. However,
changing the data representation is a tedious and
error-prone process that makes it hard to test,
maintain and evolve the source code. We present an
automated and composable mechanism that allows
programmers to safely change the data representation in
delimited scopes containing anything from expressions
to entire class definitions. To achieve this,
programmers define a transformation and our mechanism
automatically and transparently applies it during
compilation, eliminating the need to manually change
the source code. Our technique leverages the type
system in order to offer correctness guarantees on the
transformation and its interaction with object-oriented
language features, such as dynamic dispatch,
inheritance and generics. We have embedded this
technique in a Scala compiler plugin and used it in
four very different transformations, ranging from
improving the data layout and encoding, to retrofitting
specialization and value class status, and all the way
to collection deforestation. On our benchmarks, the
technique obtained speedups between 1.8x and 24.5x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Marr:2015:TVP,
author = "Stefan Marr and St{\'e}phane Ducasse",
title = "Tracing vs. partial evaluation: comparing
meta-compilation approaches for self-optimizing
interpreters",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "821--839",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814275",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Tracing and partial evaluation have been proposed as
meta-compilation techniques for interpreters to make
just-in-time compilation language-independent. They
promise that programs executing on simple interpreters
can reach performance of the same order of magnitude as
if they would be executed on state-of-the-art virtual
machines with highly optimizing just-in-time compilers
built for a specific language. Tracing and partial
evaluation approach this meta-compilation from two ends
of a spectrum, resulting in different sets of
tradeoffs. This study investigates both approaches in
the context of self-optimizing interpreters, a
technique for building fast abstract-syntax-tree
interpreters. Based on RPython for tracing and Truffle
for partial evaluation, we assess the two approaches by
comparing the impact of various optimizations on the
performance of an interpreter for SOM, an
object-oriented dynamically-typed language. The goal is
to determine whether either approach yields clear
performance or engineering benefits. We find that
tracing and partial evaluation both reach roughly the
same level of performance. SOM based on meta-tracing is
on average 3x slower than Java, while SOM based on
partial evaluation is on average 2.3x slower than Java.
With respect to the engineering, tracing has however
significant benefits, because it requires language
implementers to apply fewer optimizations to reach the
same level of performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Upadhyaya:2015:EML,
author = "Ganesha Upadhyaya and Hridesh Rajan",
title = "Effectively mapping linguistic abstractions for
message-passing concurrency to threads on the {Java
Virtual Machine}",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "840--859",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814289",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Efficient mapping of message passing concurrency (MPC)
abstractions to Java Virtual Machine (JVM) threads is
critical for performance, scalability, and CPU
utilization; but tedious and time consuming to perform
manually. In general, this mapping cannot be found in
polynomial time, but we show that by exploiting the
local characteristics of MPC abstractions and their
communication patterns this mapping can be determined
effectively. We describe our MPC abstraction to thread
mapping technique, its realization in two frameworks
(Panini and Akka), and its rigorous evaluation using
several benchmarks from representative MPC frameworks.
We also compare our technique against four default
mapping techniques: thread-all, round-robin-task-all,
random-task-all and work-stealing. Our evaluation shows
that our mapping technique can improve the performance
by 30\%-60\% over default mapping techniques. These
improvements are due to a number of challenges
addressed by our technique namely: (i) balancing the
computations across JVM threads, (ii) reducing the
communication overheads, (iii) utilizing information
about cache locality, and (iv) mapping MPC abstractions
to threads in a way that reduces the contention between
JVM threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Srinivasan:2015:PEM,
author = "Venkatesh Srinivasan and Thomas Reps",
title = "Partial evaluation of machine code",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "860--879",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814321",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents an algorithm for off-line partial
evaluation of machine code. The algorithm follows the
classical two-phase approach of binding-time analysis
(BTA) followed by specialization. However, machine-code
partial evaluation presents a number of new challenges,
and it was necessary to devise new techniques for use
in each phase. --- Our BTA algorithm makes use of an
instruction-rewriting method that ``decouples''
multiple updates performed by a single instruction.
This method counters the cascading imprecision that
would otherwise occur with a more naive approach to
BTA. --- Our specializer specializes an explicit
representation of the semantics of an instruction, and
emits residual code via machine-code synthesis.
Moreover, to create code that allows the stack and heap
to be at different positions at run-time than at
specialization-time, the specializer represents
specialization-time addresses using symbolic constants,
and uses a symbolic state for specialization. Our
experiments show that our algorithm can be used to
specialize binaries with respect to commonly used
inputs to produce faster binaries, as well as to
extract an executable component from a bloated
binary.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Erdweg:2015:CCF,
author = "Sebastian Erdweg and Oliver Bracevac and Edlira Kuci
and Matthias Krebs and Mira Mezini",
title = "A co-contextual formulation of type rules and its
application to incremental type checking",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "880--897",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814277",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type rules associate types to expressions given a
typing context. As the type checker traverses the
expression tree top-down, it extends the typing context
with additional context information that becomes
available. This way, the typing context coordinates
type checking in otherwise independent subexpressions,
which inhibits parallelization and incrementalization
of type checking. We propose a co-contextual
formulation of type rules that only take an expression
as input and produce a type and a set of context
requirements. Co-contextual type checkers traverse an
expression tree bottom-up and merge context
requirements of independently checked subexpressions.
We describe a method for systematically constructing a
co-contextual formulation of type rules from a regular
context-based formulation and we show how co-contextual
type rules give rise to incremental type checking.
Using our method, we derive incremental type checkers
for PCF and for extensions that introduce records,
parametric polymorphism, and subtyping. Our performance
evaluation shows that co-contextual type checking has
performance comparable to standard context-based type
checking, and incrementalization can improve
performance significantly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Brandauer:2015:DDF,
author = "Stephan Brandauer and Dave Clarke and Tobias
Wrigstad",
title = "Disjointness domains for fine-grained aliasing",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "898--916",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814280",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aliasing is crucial for supporting useful
implementation patterns, but it makes reasoning about
programs difficult. To deal with this problem, numerous
type-based aliasing control mechanisms have been
proposed, expressing properties such as uniqueness.
Uniqueness, however, is black-and-white: either a
reference is unique or it can be arbitrarily aliased;
and global: excluding aliases throughout the entire
system, making code brittle to changing requirements.
Disjointness domains, a new approach to alias control,
address this problem by enabling more graduations
between uniqueness and arbitrary reference sharing.
They allow expressing aliasing constraints local to a
certain set of variables (either stack variables or
fields) for instance that no aliasing occurs between
variables within some set of variables but between such
sets or the opposite, that aliasing occurs within that
set but not between different sets. A hierarchy of
disjointness domains controls the flow of references
through a program, helping the programmer reason about
disjointness and enforce local alias invariants. The
resulting system supports fine-grained control of
aliasing between both variables and objects, making
aliasing explicit to programmers, compilers, and
tooling. This paper presents a formal account of
disjointness domains along with examples. Disjointness
domains provide novel means of expressing may-alias
kinds of constraints, which may prove useful in
compiler optimisation and verification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Crafa:2015:CAT,
author = "Silvia Crafa and Luca Padovani",
title = "The chemical approach to typestate-oriented
programming",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "917--934",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814287",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study a novel approach to typestate-oriented
programming based on the chemical metaphor: state and
operations on objects are molecules of messages and
state transformations are chemical reactions. This
approach allows us to investigate typestate in an
inherently concurrent setting, whereby objects can be
accessed and modified concurrently by several
processes, each potentially changing only part of their
state. We introduce a simple behavioral type theory to
express in a uniform way both the private and the
public interfaces of objects, to describe and enforce
structured object protocols consisting of
possibilities, prohibitions, and obligations, and to
control object sharing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Toro:2015:CGP,
author = "Mat{\'\i}as Toro and {\'E}ric Tanter",
title = "Customizable gradual polymorphic effects for {Scala}",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "935--953",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814315",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite their obvious advantages in terms of static
reasoning, the adoption of effect systems is still
rather limited in practice. Recent advances such as
generic effect systems, lightweight effect
polymorphism, and gradual effect checking, all
represent promising steps towards making effect systems
suitable for widespread use. However, no existing
system combines these approaches: the theory of gradual
polymorphic effects has not been developed, and there
are no implementations of gradual effect checking. In
addition, a limiting factor in the adoption of effect
systems is their unsuitability for localized and
customized effect disciplines. This paper addresses
these issues by presenting the first implementation of
gradual effect checking, for Scala, which supports both
effect polymorphism and a domain-specific language
called Effscript to declaratively define and customize
effect disciplines. We report on the theory,
implementation, and practical application of the
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Kim:2015:CPM,
author = "Sang-Hoon Kim and Sejun Kwon and Jin-Soo Kim and
Jinkyu Jeong",
title = "Controlling physical memory fragmentation in mobile
systems",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "1--14",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754179",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Since the adoption of hardware-accelerated features
(e.g., hardware codec) improves the performance and
quality of mobile devices, it revives the need for
contiguous memory allocation. However, physical memory
in mobile systems is highly fragmented due to the
frequent spawn and exit of processes and the lack of
proactive anti-fragmentation scheme. As a result, the
memory allocation for large and contiguous I/O buffers
suffer from the highly fragmented memory, thereby
incurring high CPU usage and power consumption. This
paper presents a proactive anti-fragmentation approach
that groups pages with the same lifetime, and stores
them contiguously in fixed-size contiguous regions.
When a process is killed to secure free memory, a set
of contiguous regions are freed and subsequent
contiguous memory allocations can be easily satisfied
without incurring additional overhead. Our prototype
implementation on a Nexus 10 tablet with the Android
kernel shows that the proposed scheme greatly
alleviates fragmentation, thereby reducing the I/O
buffer allocation time, associated CPU usage, and
energy consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Hussein:2015:DRM,
author = "Ahmed Hussein and Antony L. Hosking and Mathias Payer
and Christopher A. Vick",
title = "Don't race the memory bus: taming the {GC} leadfoot",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "15--27",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754182",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic voltage and frequency scaling (DVFS) is
ubiquitous on mobile devices as a mechanism for saving
energy. Reducing the clock frequency of a processor
allows a corresponding reduction in power consumption,
as does turning off idle cores. Garbage collection is a
canonical example of the sort of memory-bound workload
that best responds to such scaling. Here, we explore
the impact of frequency scaling for garbage collection
in a real mobile device running Android's Dalvik
virtual machine, which uses a concurrent collector. By
controlling the frequency of the core on which the
concurrent collector thread runs we can reduce power
significantly. Running established multi-threaded
benchmarks shows that total processor energy can be
reduced up to 30\%, with end-to-end performance loss of
at most 10\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Cohen:2015:DSA,
author = "Nachshon Cohen and Erez Petrank",
title = "Data structure aware garbage collector",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "28--40",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754176",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Garbage collection may benefit greatly from knowledge
about program behavior, but most managed languages do
not provide means for the programmer to deliver such
knowledge. In this work we propose a very simple
interface that requires minor programmer effort and
achieves substantial performance and scalability
improvements. In particular, we focus on the common use
of data structures or collections for organizing data
on the heap. We let the program notify the collector
which classes represent nodes of data structures and
also when such nodes are being removed from their data
structures. The data-structure aware (DSA) garbage
collector uses this information to improve performance,
locality, and load balancing. Experience shows that
this interface requires a minor modification of the
application. Measurements show that for some
significant benchmarks this interface can dramatically
reduce the time spent on garbage collection and also
improve the overall program performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Kuszmaul:2015:SSF,
author = "Bradley C. Kuszmaul",
title = "{SuperMalloc}: a super fast multithreaded {\tt malloc}
for 64-bit machines",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "41--55",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754178",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "SuperMalloc is an implementation of malloc(3)
originally designed for X86 Hardware Transactional
Memory (HTM)@. It turns out that the same design
decisions also make it fast even without HTM@. For the
malloc-test benchmark, which is one of the most
difficult workloads for an allocator, with one thread
SuperMalloc is about 2.1 times faster than the best of
DLmalloc, JEmalloc, Hoard, and TBBmalloc; with 8
threads and HTM, SuperMalloc is 2.75 times faster; and
on 32 threads without HTM SuperMalloc is 3.4 times
faster. SuperMalloc generally compares favorably with
the other allocators on speed, scalability, speed
variance, memory footprint, and code size. SuperMalloc
achieves these performance advantages using less than
half as much code as the alternatives. SuperMalloc
exploits the fact that although physical memory is
always precious, virtual address space on a 64-bit
machine is relatively cheap. It allocates 2 chunks
which contain objects all the same size. To translate
chunk numbers to chunk metadata, SuperMalloc uses a
simple array (most of which is uncommitted to physical
memory). SuperMalloc takes care to avoid associativity
conflicts in the cache: most of the size classes are a
prime number of cache lines, and nonaligned huge
accesses are randomly aligned within a page. Objects
are allocated from the fullest non-full page in the
appropriate size class. For each size class,
SuperMalloc employs a 10-object per-thread cache, a
per-CPU cache that holds about a level-2-cache worth of
objects per size class, and a global cache that is
organized to allow the movement of many objects between
a per-CPU cache and the global cache using $ O(1) $
instructions. SuperMalloc prefetches everything it can
before starting a critical section, which makes the
critical sections run fast, and for HTM improves the
odds that the transaction will commit.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Osterlund:2015:CCU,
author = "Erik {\"O}sterlund and Welf L{\"o}we",
title = "Concurrent compaction using a field pinning protocol",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "56--69",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754177",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compaction of memory in long running systems has
always been important. The latency of compaction
increases in today's systems with high memory demands
and large heaps. To deal with this problem, we present
a lock-free protocol allowing for copying concurrent
with the application running, which reduces the
latencies of compaction radically. It provides
theoretical progress guarantees for copying and
application threads without making it practically
infeasible, with performance overheads of 15\% on
average. The algorithm paves the way for a future
lock-free Garbage Collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Lin:2015:SGU,
author = "Yi Lin and Kunshan Wang and Stephen M. Blackburn and
Antony L. Hosking and Michael Norrish",
title = "Stop and go: understanding yieldpoint behavior",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "70--80",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754187",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Yieldpoints are critical to the implementation of high
performance garbage collected languages, yet the design
space is not well understood. Yieldpoints allow a
running program to be interrupted at well-defined
points in its execution, facilitating exact garbage
collection, biased locking, on-stack replacement,
profiling, and other important virtual machine
behaviors. In this paper we identify and evaluate
yieldpoint design choices, including previously
undocumented designs and optimizations. One of the
designs we identify opens new opportunities for very
low overhead profiling. We measure the frequency with
which yieldpoints are executed and establish a
methodology for evaluating the common case execution
time overhead. We also measure the median and worst
case time-to-yield. We find that Java benchmarks
execute about 100M yieldpoints per second, of which
about 1/20000 are taken. The average execution time
overhead for untaken yieldpoints on the VM we use
ranges from 2.5\% to close to zero on modern hardware,
depending on the design, and we find that the designs
trade off total overhead with worst case time-to-yield.
This analysis gives new insight into a critical but
overlooked aspect of garbage collector implementation,
and identifies a new optimization and new opportunities
for very low overhead profiling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Stancu:2015:SEH,
author = "Codrut Stancu and Christian Wimmer and Stefan
Brunthaler and Per Larsen and Michael Franz",
title = "Safe and efficient hybrid memory management for
{Java}",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "81--92",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754185",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Java uses automatic memory management, usually
implemented as a garbage-collected heap. That lifts the
burden of manually allocating and deallocating memory,
but it can incur significant runtime overhead and
increase the memory footprint of applications. We
propose a hybrid memory management scheme that utilizes
region-based memory management to deallocate objects
automatically on region exits. Static program analysis
detects allocation sites that are safe for region
allocation, i.e., the static analysis proves that the
objects allocated at such a site are not reachable
after the region exit. A regular garbage-collected heap
is used for objects that are not region allocatable.
The region allocation exploits the temporal locality of
object allocation. Our analysis uses coarse-grain
source code annotations to disambiguate objects with
non-overlapping lifetimes, and maps them to different
memory scopes. Region-allocated memory does not require
garbage collection as the regions are simply
deallocated when they go out of scope. The region
allocation technique is backed by a garbage collector
that manages memory that is not region allocated. We
provide a detailed description of the analysis, provide
experimental results showing that as much as 78\% of
the memory is region allocatable and discuss how our
hybrid memory management system can be implemented
efficiently with respect to both space and time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Miranda:2015:PRB,
author = "Eliot Miranda and Cl{\'e}ment B{\'e}ra",
title = "A partial read barrier for efficient support of live
object-oriented programming",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "93--104",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754186",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Live programming, originally introduced by Smalltalk
and Lisp, and now gaining popularity in contemporary
systems such as Swift, requires on-the-fly support for
object schema migration, such that the layout of
objects may be changed while the program is at one and
the same time being run and developed. In Smalltalk
schema migration is supported by two primitives, one
that answers a collection of all instances of a class,
and one that exchanges the identities of pairs of
objects, called the become primitive. Existing
instances are collected, copies using the new schema
created, state copied from old to new, and the two
exchanged with become, effecting the schema migration.
Historically the implementation of become has either
required an extra level of indirection between an
object's address and its body, slowing down slot
access, or has required a sweep of all objects, a very
slow operation on large heaps. Spur, a new object
representation and memory manager for Smalltalk-like
languages, has neither of these deficiencies. It uses
direct pointers but still provides a fast become
operation in large heaps, thanks to forwarding objects
that when read conceptually answer another object and a
partial read barrier that avoids the cost of explicitly
checking for forwarding objects on the vast majority of
object accesses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Clifford:2015:MMD,
author = "Daniel Clifford and Hannes Payer and Michael Stanton
and Ben L. Titzer",
title = "Memento mori: dynamic allocation-site-based
optimizations",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "105--117",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754181",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Languages that lack static typing are ubiquitous in
the world of mobile and web applications. The rapid
rise of larger applications like interactive web GUIs,
games, and cryptography presents a new range of
implementation challenges for modern virtual machines
to close the performance gap between typed and untyped
languages. While all languages can benefit from
efficient automatic memory management, languages like
JavaScript present extra thrill with innocent-looking
but difficult features like dynamically-sized arrays,
deletable properties, and prototypes. Optimizing such
languages requires complex dynamic techniques with more
radical object layout strategies such as dynamically
evolving representations for arrays. This paper
presents a general approach for gathering temporal
allocation site feedback that tackles both the general
problem of object lifetime estimation and improves
optimization of these problematic language features. We
introduce a new implementation technique where
allocation mementos processed by the garbage collector
and runtime system efficiently tie objects back to
allocation sites in the program and dynamically
estimate object lifetime, representation, and size to
inform three optimizations: pretenuring,
pretransitioning, and presizing. Unlike previous work
on pretenuring, our system utilizes allocation mementos
to achieve fully dynamic allocation-site-based
pretenuring in a production system. We implement all of
our techniques in V8, a high performance virtual
machine for JavaScript, and demonstrate solid
performance improvements across a range of
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Shidal:2015:RTC,
author = "Jonathan Shidal and Ari J. Spilo and Paul T. Scheid
and Ron K. Cytron and Krishna M. Kavi",
title = "Recycling trash in cache",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "118--130",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754183",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The disparity between processing and storage speeds
can be bridged in part by reducing the traffic into and
out of the slower memory components. Some recent
studies reduce such traffic by determining dead data in
cache, showing that a significant fraction of writes
can be squashed before they make the trip toward slower
memory. In this paper, we examine a technique for
eliminating traffic in the other direction,
specifically the traffic induced by dynamic storage
allocation. We consider recycling dead storage in cache
to satisfy a program's storage-allocation requests. We
first evaluate the potential for recycling under
favorable circumstances, where the associated logic can
run at full speed with no impact on the cache's normal
behavior. We then consider a more practical
implementation, in which the associated logic executes
independently from the cache's critical path. Here, the
cache's performance is unfettered by recycling, but the
operations necessary to determine dead storage and
recycle such storage execute as time is available.
Finally, we present the design and analysis of a
hardware implementation that scales well with cache
size without sacrificing too much performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Cutler:2015:RPT,
author = "Cody Cutler and Robert Morris",
title = "Reducing pause times with clustered collection",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "131--142",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754184",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Each full garbage collection in a program with
millions of objects can pause the program for multiple
seconds. Much of this work is typically repeated, as
the collector re-traces parts of the object graph that
have not changed since the last collection. Clustered
Collection reduces full collection pause times by
eliminating much of this repeated work. Clustered
Collection identifies clusters: regions of the object
graph that are reachable from a single ``head'' object,
so that reachability of the head implies reachability
of the whole cluster. As long as it is not written, a
cluster need not be re-traced by successive full
collections. The main design challenge is coping with
program writes to clusters while ensuring safe,
complete, and fast collections. In some cases program
writes require clusters to be dissolved, but in most
cases Clustered Collection can handle writes without
having to re-trace the affected cluster. Clustered
Collection chooses clusters likely to suffer few writes
and to yield high savings from re-trace avoidance.
Clustered Collection is implemented as modifications to
the Racket collector. Measurements of the code and data
from the Hacker News web site (which suffers from
significant garbage collection pauses) and a
Twitter-like application show that Clustered Collection
decreases full collection pause times by a factor of
three and six respectively. This improvement is
possible because both applications have gigabytes of
live data, modify only a small fraction of it, and
usually write in ways that do not result in cluster
dissolution. Identifying clusters takes more time than
a full collection, but happens much less frequently
than full collection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Cameron:2015:JFE,
author = "Callum Cameron and Jeremy Singer and David Vengerov",
title = "The judgment of {FORSETI}: economic utility for
dynamic heap sizing of multiple runtimes",
journal = j-SIGPLAN,
volume = "50",
number = "11",
pages = "143--156",
month = nov,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887746.2754180",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "We introduce the FORSETI system, which is a principled
approach for holistic memory management. It permits a
sysadmin to specify the total physical memory resource
that may be shared between all concurrent virtual
machines on a physical node. FORSETI models the heap
size versus application throughput for each virtual
machine, and seeks to maximize the combined throughput
of the set of VMs based on concepts from economic
utility theory. We evaluate the FORSETI system using a
standard Java managed runtime, i.e. OpenJDK. Our
results demonstrate that FORSETI enables dramatic
reductions (up to 5x) in heap footprint without
compromising application execution times.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '15 conference proceedings.",
}
@Article{Diatchki:2015:IHT,
author = "Iavor S. Diatchki",
title = "Improving {Haskell} types with {SMT}",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "1--10",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804307",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a technique for integrating GHC's
type-checker with an SMT solver. The technique was
developed to add support for reasoning about type-level
functions on natural numbers, and so our implementation
uses the theory of linear arithmetic. However, the
approach is not limited to this theory, and makes it
possible to experiment with other external decision
procedures, such as reasoning about type-level
booleans, bit-vectors, or any other theory supported by
SMT solvers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Gundry:2015:TPU,
author = "Adam Gundry",
title = "A typechecker plugin for units of measure:
domain-specific constraint solving in {GHC Haskell}",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "11--22",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804305",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Typed functional programming and units of measure are
a natural combination, as F\# ably demonstrates.
However, encoding statically-checked units in Haskell's
type system leads to inevitable disappointment with the
usability of the resulting system. Extending the
language itself would produce a much better result, but
it would be a lot of work! In this paper, I demonstrate
how typechecker plugins in the Glasgow Haskell Compiler
allow users to define domain-specific constraint
solving behaviour, making it possible to implement
units of measure as a type system extension without
rebuilding the compiler. This paves the way for a more
modular treatment of constraint solving in GHC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Farmer:2015:RHT,
author = "Andrew Farmer and Neil Sculthorpe and Andy Gill",
title = "Reasoning with the {HERMIT}: tool support for
equational reasoning on {GHC} core programs",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "23--34",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804303",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A benefit of pure functional programming is that it
encourages equational reasoning. However, the Haskell
language has lacked direct tool support for such
reasoning. Consequently, reasoning about Haskell
programs is either performed manually, or in another
language that does provide tool support (e.g. Agda or
Coq). HERMIT is a Haskell-specific toolkit designed to
support equational reasoning and user-guided program
transformation, and to do so as part of the GHC
compilation pipeline. This paper describes HERMIT's
recently developed support for equational reasoning,
and presents two case studies of HERMIT usage: checking
that type-class laws hold for specific instance
declarations, and mechanising textbook equational
reasoning.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Breitner:2015:FPC,
author = "Joachim Breitner",
title = "Formally proving a compiler transformation safe",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "35--46",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804312",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We prove that the Call Arity analysis and
transformation, as implemented in the Haskell compiler
GHC, is safe, i.e. does not impede the performance of
the program. We formalized syntax, semantics, the
analysis and the transformation in the interactive
theorem prover Isabelle to obtain a machine-checked
proof and hence a level of rigor rarely obtained for
compiler optimization safety theorems. The proof is
modular and introduces trace trees as a suitable
abstraction in abstract cardinality analyses. We
discuss the breadth of the formalization gap.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Perez:2015:BGG,
author = "Ivan Perez and Henrik Nilsson",
title = "Bridging the {GUI} gap with reactive values and
relations",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "47--58",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804316",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There are at present two ways to write GUIs for
functional code. One is to use standard GUI toolkits,
with all the benefits they bring in terms of feature
completeness, choice of platform, conformance to
platform-specific look-and-feel, long-term viability,
etc. However, such GUI APIs mandate an imperative
programming style for the GUI and related parts of the
application. Alternatively, we can use a functional GUI
toolkit. The GUI can then be written in a functional
style, but at the cost of foregoing many advantages of
standard toolkits that often will be of critical
importance. This paper introduces a light-weight
framework structured around the notions of reactive
values and reactive relations. It allows standard
toolkits to be used from functional code written in a
functional style. We thus bridge the gap between the
two worlds, bringing the advantages of both to the
developer. Our framework is available on Hackage and
has been been validated through the development of
non-trivial applications in a commercial context, and
with different standard GUI toolkits.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Gill:2015:RMD,
author = "Andy Gill and Neil Sculthorpe and Justin Dawson and
Aleksander Eskilson and Andrew Farmer and Mark Grebe
and Jeffrey Rosenbluth and Ryan Scott and James
Stanton",
title = "The remote monad design pattern",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "59--70",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804311",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Remote Procedure Calls are expensive. This paper
demonstrates how to reduce the cost of calling remote
procedures from Haskell by using the remote monad
design pattern, which amortizes the cost of remote
calls. This gives the Haskell community access to
remote capabilities that are not directly supported, at
a surprisingly inexpensive cost. We explore the remote
monad design pattern through six models of remote
execution patterns, using a simulated Internet of
Things toaster as a running example. We consider the
expressiveness and optimizations enabled by each remote
execution model, and assess the feasibility of our
approach. We then present a full-scale case study: a
Haskell library that provides a Foreign Function
Interface to the JavaScript Canvas API. Finally, we
discuss existing instances of the remote monad design
pattern found in Haskell libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Morris:2015:VV,
author = "J. Garrett Morris",
title = "Variations on variants",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "71--81",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804320",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Extensible variants improve the modularity and
expressiveness of programming languages: they allow
program functionality to be decomposed into independent
blocks, and allow seamless extension of existing code
with both new cases of existing data types and new
operations over those data types. This paper considers
three approaches to providing extensible variants in
Haskell. Row typing is a long understood mechanism for
typing extensible records and variants, but its
adoption would require extension of Haskell's core type
system. Alternatively, we might hope to encode
extensible variants in terms of existing mechanisms,
such as type classes. We describe an encoding of
extensible variants using instance chains, a proposed
extension of the class system. Unlike many previous
encodings of extensible variants, ours does not require
the definition of a new type class for each function
that consumes variants. Finally, we translate our
encoding to use closed type families, an existing
feature of GHC. Doing so demonstrates the
interpretation of instances chains and functional
dependencies in closed type families. One concern with
encodings like ours is how completely they match the
encoded system. We compare the expressiveness of our
encodings with each other and with systems based on row
types. We find that, while equivalent terms are typable
in each system, both encodings require explicit type
annotations to resolve ambiguities in typing not
present in row type systems, and the type family
implementation retains more constraints in principal
types than does the instance chain implementation. We
propose a general mechanism to guide the instantiation
of ambiguous type variables, show that it eliminates
the need for type annotations in our encodings, and
discuss conditions under which it preserves
coherence.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Oliveira:2015:MRM,
author = "Bruno C. d. S. Oliveira and Shin-Cheng Mu and Shu-Hung
You",
title = "Modular reifiable matching: a list-of-functors
approach to two-level types",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "82--93",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804315",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "This paper presents Modular Reifiable Matching (MRM):
a new approach to two level types using a fixpoint of
list-of-functors representation. MRM allows the modular
definition of datatypes and functions by pattern
matching, using a style similar to the widely popular
Datatypes a la Carte (DTC) approach. However, unlike
DTC, MRM uses a fixpoint of list-of-functors approach
to two-level types. This approach has advantages that
help with various aspects of extensibility, modularity
and reuse. Firstly, modular pattern matching
definitions are collected using a list of matches that
is fully reifiable. This allows for extensible pattern
matching definitions to be easily reused/inherited, and
particular matches to be overridden. Such flexibility
is used, among other things, to implement extensible
generic traversals. Secondly, the subtyping relation
between lists of functors is quite simple, does not
require backtracking, and is easy to model in languages
like Haskell. MRM is implemented as a Haskell library,
and its use and applicability are illustrated through
various examples in the paper.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Kiselyov:2015:FMM,
author = "Oleg Kiselyov and Hiromi Ishii",
title = "Freer monads, more extensible effects",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "94--105",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804319",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a rational reconstruction of extensible
effects, the recently proposed alternative to monad
transformers, as the confluence of efforts to make
effectful computations compose. Free monads and then
extensible effects emerge from the straightforward term
representation of an effectful computation, as more and
more boilerplate is abstracted away. The generalization
process further leads to freer monads, constructed
without the Functor constraint. The continuation
exposed in freer monads can then be represented as an
efficient type-aligned data structure. The end result
is the algorithmically efficient extensible effects
library, which is not only more comprehensible but also
faster than earlier implementations. As an illustration
of the new library, we show three surprisingly simple
applications: non-determinism with committed choice
(LogicT), catching IO exceptions in the presence of
other effects, and the semi-automatic management of
file handles and other resources through monadic
regions. We extensively use and promote the new sort of
`laziness', which underlies the left Kan extension:
instead of performing an operation, keep its operands
and pretend it is done.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Foner:2015:FPG,
author = "Kenneth Foner",
title = "Functional pearl: getting a quick fix on comonads",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "106--117",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804310",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A piece of functional programming folklore due to
Piponi provides L{\"o}b's theorem from modal
provability logic with a computational interpretation
as an unusual fixed point. Interpreting modal necessity
as an arbitrary Functor in Haskell, the ``type'' of
L{\"o}b's theorem is inhabited by a fixed point
function allowing each part of a structure to refer to
the whole. However, Functor's logical interpretation
may be used to prove L{\"o}b's theorem only by relying
on its implicit functorial strength, an axiom not
available in the provability modality. As a result, the
well known Loeb fixed point ``cheats'' by using
functorial strength to implement its recursion. Rather
than Functor, a closer Curry analogue to modal logic's
Howard inspiration is a closed (semi-)comonad, of which
Haskell's ComonadApply typeclass provides analogous
structure. Its computational interpretation permits the
definition of a novel fixed point function allowing
each part of a structure to refer to its own context
within the whole. This construction further guarantees
maximal sharing and asymptotic efficiency superior to
Loeb for locally contextual computations upon a large
class of structures. With the addition of a
distributive law, closed comonads may be composed into
spaces of arbitrary dimensionality while preserving the
performance guarantees of this new fixed point. From
these elements, we construct a small embedded
domain-specific language to elegantly express and
evaluate multidimensional ``spreadsheet-like''
recurrences for a variety of cellular automata.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Stolarek:2015:ITF,
author = "Jan Stolarek and Simon Peyton Jones and Richard A.
Eisenberg",
title = "Injective type families for {Haskell}",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "118--128",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804314",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Haskell, as implemented by the Glasgow Haskell
Compiler (GHC), allows expressive type-level
programming. The most popular type-level programming
extension is TypeFamilies, which allows users to write
functions on types. Yet, using type functions can
cripple type inference in certain situations. In
particular, lack of injectivity in type functions means
that GHC can never infer an instantiation of a type
variable appearing only under type functions. In this
paper, we describe a small modification to GHC that
allows type functions to be annotated as injective. GHC
naturally must check validity of the injectivity
annotations. The algorithm to do so is surprisingly
subtle. We prove soundness for a simplification of our
algorithm, and state and prove a completeness property,
though the algorithm is not fully complete. As much of
our reasoning surrounds functions defined by a simple
pattern-matching structure, we believe our results
extend beyond just Haskell. We have implemented our
solution on a branch of GHC and plan to make it
available to regular users with the next stable release
of the compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Serrano:2015:TFC,
author = "Alejandro Serrano and Jurriaan Hage and Patrick Bahr",
title = "Type families with class, type classes with family",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "129--140",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804304",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type classes and type families are key ingredients in
Haskell programming. Type classes were introduced to
deal with ad-hoc polymorphism, although with the
introduction of functional dependencies, their use
expanded to type-level programming. Type families also
allow encoding type-level functions, but more directly
in the form of rewrite rules. In this paper we show
that type families are powerful enough to simulate type
classes (without overlapping instances), and we provide
a formal proof of the soundness and completeness of
this simulation. Encoding instance constraints as type
families eases the path to proposed extensions to type
classes, like closed sets of instances, instance
chains, and control over the search procedure. The only
feature which type families cannot simulate is
elaboration, that is, generating code from the
derivation of a rewriting. We look at ways to solve
this problem in current Haskell, and propose an
extension to allow elaboration during the rewriting
phase.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Walker:2015:DFC,
author = "Michael Walker and Colin Runciman",
title = "{D{\'e}j{\`a} Fu}: a concurrency testing library for
{Haskell}",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "141--152",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804306",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Systematic concurrency testing (SCT) is an approach to
testing potentially nondeterministic concurrent
programs. SCT avoids potentially unrepeatable results
that may arise from unit testing concurrent programs.
It seems to have received little attention from Haskell
programmers. This paper introduces a generalisation of
Haskell's concurrency abstraction in the form of
typeclasses, and a library for testing concurrent
programs. A number of examples are provided, some of
which come from pre-existing packages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Trilla:2015:IIP,
author = "Jos{\'e} Manuel Calder{\'o}n Trilla and Colin
Runciman",
title = "Improving implicit parallelism",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "153--164",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804308",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Using static analysis techniques compilers for lazy
functional languages can be used to identify parts of a
program that can be legitimately evaluated in parallel
and ensure that those expressions are executed
concurrently with the main thread of execution. These
techniques can produce improvements in the runtime
performance of a program, but are limited by the static
analyses' poor prediction of runtime performance. This
paper outlines the development of a system that uses
iterative profile-directed improvement in addition to
well-studied static analysis techniques. This allows us
to achieve higher performance gains than through static
analysis alone.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Scibior:2015:PPP,
author = "Adam {\'S}cibior and Zoubin Ghahramani and Andrew D.
Gordon",
title = "Practical probabilistic programming with monads",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "165--176",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804317",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The machine learning community has recently shown a
lot of interest in practical probabilistic programming
systems that target the problem of Bayesian inference.
Such systems come in different forms, but they all
express probabilistic models as computational processes
using syntax resembling programming languages. In the
functional programming community monads are known to
offer a convenient and elegant abstraction for
programming with probability distributions, but their
use is often limited to very simple inference problems.
We show that it is possible to use the monad
abstraction to construct probabilistic models for
machine learning, while still offering good performance
of inference in challenging models. We use a GADT as an
underlying representation of a probability distribution
and apply Sequential Monte Carlo-based methods to
achieve efficient inference. We define a formal
semantics via measure theory. We demonstrate a clean
and elegant implementation that achieves performance
comparable with Anglican, a state-of-the-art
probabilistic programming system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Polakow:2015:EFL,
author = "Jeff Polakow",
title = "Embedding a full linear lambda calculus in {Haskell}",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "177--188",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804309",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an encoding of full linear lambda calculus
in Haskell using higher order abstract syntax. By
making use of promoted data kinds, multi-parameter type
classes and functional dependencies, the encoding
allows Haskell to do both linear type checking and
linear type inference.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{Elliott:2015:GFI,
author = "Trevor Elliott and Lee Pike and Simon Winwood and Pat
Hickey and James Bielman and Jamey Sharp and Eric
Seidel and John Launchbury",
title = "Guilt free {Ivory}",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "189--200",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804318",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Ivory is a language that enforces memory safety and
avoids most undefined behaviors while providing
low-level control of memory-manipulation. Ivory is
embedded in a modern variant of Haskell, as implemented
by the GHC compiler. The main contributions of the
paper are two-fold. First, we demonstrate how to embed
the type-system of a safe-C language into the type
extensions of GHC. Second, Ivory is of interest in its
own right, as a powerful language for writing
high-assurance embedded programs. Beyond invariants
enforced by its type-system, Ivory has direct support
for model-checking, theorem-proving, and property-based
testing. Ivory's semantics have been formalized and
proved to guarantee memory safety.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{McDonell:2015:TSR,
author = "Trevor L. McDonell and Manuel M. T. Chakravarty and
Vinod Grover and Ryan R. Newton",
title = "Type-safe runtime code generation: accelerate to
{LLVM}",
journal = j-SIGPLAN,
volume = "50",
number = "12",
pages = "201--212",
month = dec,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2887747.2804313",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:44 MST 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded languages are often compiled at application
runtime; thus, embedded compile-time errors become
application runtime errors. We argue that advanced type
system features, such as GADTs and type families, play
a crucial role in minimising such runtime errors.
Specifically, a rigorous type discipline reduces
runtime errors due to bugs in both embedded language
applications and the implementation of the embedded
language compiler itself. In this paper, we focus on
the safety guarantees achieved by type preserving
compilation. We discuss the compilation pipeline of
Accelerate, a high-performance array language targeting
both multicore CPUs and GPUs, where we are able to
preserve types from the source language down to a
low-level register language in SSA form. Specifically,
we demonstrate the practicability of our approach by
creating a new type-safe interface to the
industrial-strength LLVM compiler infrastructure, which
we used to build two new Accelerate backends that show
competitive runtimes on a set of benchmarks across both
CPUs and GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '15 conference proceedings.",
}
@Article{McKinley:2016:PWU,
author = "Kathryn S. McKinley",
title = "Programming the world of uncertain things (keynote)",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "1--2",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2843895",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computing has entered the era of uncertain data, in
which hardware and software generate and reason about
estimates. Applications use estimates from sensors,
machine learning, big data, humans, and approximate
hardware and software. Unfortunately, developers face
pervasive correctness, programmability, and
optimization problems due to estimates. Most
programming languages unfortunately make these problems
worse. We propose a new programming abstraction called
{Uncertain$<$T$>$} embedded into languages, such as
C\#, C++, Java, Python, and JavaScript. Applications
that consume estimates use familiar discrete operations
for their estimates; overloaded conditional operators
specify hypothesis tests and applications use them
control false positives and negatives; and new
compositional operators express domain knowledge. By
carefully restricting the expressiveness, the runtime
automatically implements correct statistical reasoning
at conditionals, relieving developers of the need to
implement or deeply understand statistics. We
demonstrate substantial programmability, correctness,
and efficiency benefits of this programming model for
GPS sensor navigation, approximate computing, machine
learning, and xBox.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Murray:2016:SRC,
author = "Richard M. Murray",
title = "Synthesis of reactive controllers for hybrid systems
(keynote)",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "3--3",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2843894",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Decision-making logic in hybrid systems is responsible
for selecting modes of operation for the underlying
(continuous) control system, reacting to external
events and failures in the system, and insuring that
the overall control system is satisfying safety and
performance specifications. Tools from computer
science, such as model-checking and logic synthesis,
combined with design patterns from feedback control
theory provide new approaches to solving these
problems. A major shift is the move from ``design then
verify'' to ``specify then synthesize'' approaches to
controller design that allow simultaneous synthesis of
high-performance, robust control laws and
correct-by-construction decision-making logic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Walker:2016:CPL,
author = "David Walker",
title = "Confluences in programming languages research
(keynote)",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "4--4",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2843896",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A confluence occurs when two rivers flow together;
downstream the combined forces gather strength and
propel their waters forward with increased vigor. In
academic research, according to Varghese, a confluence
occurs after some trigger, perhaps a discovery or a
change in technology, and brings two previously
separate branches of research together. In this talk, I
will discuss confluences in programming languages
research. Here, confluences often occur when basic
research finds application in some important new
domain. Two prime examples from my own career involve
the confluence of research in type theory and systems
security, triggered by new theoretical tools for
reasoning about programming language safety, and the
confluence of formal methods and networking, triggered
by the rise of data centers. These experiences may shed
light on what to teach our students and what is next
for programming languages research.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Brown:2016:BTN,
author = "Matt Brown and Jens Palsberg",
title = "Breaking through the normalization barrier: a
self-interpreter for {F$_{\rm omega}$}",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "5--17",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837623",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "According to conventional wisdom, a self-interpreter
for a strongly normalizing lambda-calculus is
impossible. We call this the normalization barrier. The
normalization barrier stems from a theorem in
computability theory that says that a total universal
function for the total computable functions is
impossible. In this paper we break through the
normalization barrier and define a self-interpreter for
System F_omega, a strongly normalizing lambda-calculus.
After a careful analysis of the classical theorem, we
show that static type checking in F_omega can exclude
the proof's diagonalization gadget, leaving open the
possibility for a self-interpreter. Along with the
self-interpreter, we program four other operations in
F_omega, including a continuation-passing style
transformation. Our operations rely on a new approach
to program representation that may be useful in theorem
provers and compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Altenkirch:2016:TTT,
author = "Thorsten Altenkirch and Ambrus Kaposi",
title = "Type theory in type theory using quotient inductive
types",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "18--29",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837638",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an internal formalisation of a type heory
with dependent types in Type Theory using a special
case of higher inductive types from Homotopy Type
Theory which we call quotient inductive types (QITs).
Our formalisation of type theory avoids referring to
preterms or a typability relation but defines directly
well typed objects by an inductive definition. We use
the elimination principle to define the set-theoretic
and logical predicate interpretation. The work has been
formalized using the Agda system extended with QITs
using postulates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Cai:2016:SFE,
author = "Yufei Cai and Paolo G. Giarrusso and Klaus Ostermann",
title = "System {F$_{\rm omega}$} with equirecursive types for
datatype-generic programming",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "30--43",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837660",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traversing an algebraic datatype by hand requires
boilerplate code which duplicates the structure of the
datatype. Datatype-generic programming (DGP) aims to
eliminate such boilerplate code by decomposing
algebraic datatypes into type constructor applications
from which generic traversals can be synthesized.
However, different traversals require different
decompositions, which yield isomorphic but unequal
types. This hinders the interoperability of different
DGP techniques. In this paper, we propose F \omega \mu
, an extension of the higher-order polymorphic lambda
calculus F \omega with records, variants, and
equirecursive types. We prove the soundness of the type
system, and show that type checking for first-order
recursive types is decidable with a practical type
checking algorithm. In our soundness proof we define
type equality by interpreting types as infinitary
\lambda -terms (in particular, Berarducci-trees). To
decide type equality we \beta -normalize types, and
then use an extension of equivalence checking for usual
equirecursive types. Thanks to equirecursive types, new
decompositions for a datatype can be added modularly
and still interoperate with each other, allowing
multiple DGP techniques to work together. We sketch how
generic traversals can be synthesized, and apply these
components to some examples. Since the set of datatype
decomposition becomes extensible, System F \omega \mu
enables using DGP techniques incrementally, instead of
planning for them upfront or doing invasive
refactoring.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Curien:2016:TER,
author = "Pierre-Louis Curien and Marcelo Fiore and Guillaume
Munch-Maccagnoni",
title = "A theory of effects and resources: adjunction models
and polarised calculi",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "44--56",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837652",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider the Curry--Howard-Lambek correspondence
for effectful computation and resource management,
specifically proposing polarised calculi together with
presheaf-enriched adjunction models as the starting
point for a comprehensive semantic theory relating
logical systems, typed calculi, and categorical models
in this context. Our thesis is that the combination of
effects and resources should be considered
orthogonally. Model theoretically, this leads to an
understanding of our categorical models from two
complementary perspectives: (i) as a linearisation of
CBPV (Call-by-Push-Value) adjunction models, and (ii)
as an extension of linear/non-linear adjunction models
with an adjoint resolution of computational effects.
When the linear structure is cartesian and the resource
structure is trivial we recover Levy's notion of CBPV
adjunction model, while when the effect structure is
trivial we have Benton's linear/non-linear adjunction
models. Further instances of our model theory include
the dialogue categories with a resource modality of
Melli{\`e}s and Tabareau, and the [E]EC ([Enriched]
Effect Calculus) models of Egger, M{\o}gelberg and
Simpson. Our development substantiates the approach by
providing a lifting theorem of linear models into
cartesian ones. To each of our categorical models we
systematically associate a typed term calculus, each of
which corresponds to a variant of the sequent calculi
LJ (Intuitionistic Logic) or ILL (Intuitionistic Linear
Logic). The adjoint resolution of effects corresponds
to polarisation whereby, syntactically, types locally
determine a strict or lazy evaluation order and,
semantically, the associativity of cuts is relaxed. In
particular, our results show that polarisation provides
a computational interpretation of CBPV in direct style.
Further, we characterise depolarised models: those
where the cut is associative, and where the evaluation
order is unimportant. We explain possible advantages of
this style of calculi for the operational semantics of
effects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Murase:2016:TVH,
author = "Akihiro Murase and Tachio Terauchi and Naoki Kobayashi
and Ryosuke Sato and Hiroshi Unno",
title = "Temporal verification of higher-order functional
programs",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "57--68",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837667",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an automated approach to verifying
arbitrary omega-regular properties of higher-order
functional programs. Previous automated methods
proposed for this class of programs could only handle
safety properties or termination, and our approach is
the first to be able to verify arbitrary omega-regular
liveness properties. Our approach is
automata-theoretic, and extends our recent work on
binary-reachability-based approach to automated
termination verification of higher-order functional
programs to fair termination published in ESOP 2014. In
that work, we have shown that checking disjunctive
well-foundedness of (the transitive closure of) the
``calling relation'' is sound and complete for
termination. The extension to fair termination is
tricky, however, because the straightforward extension
that checks disjunctive well-foundedness of the fair
calling relation turns out to be unsound, as we shall
show in the paper. Roughly, our solution is to check
fairness on the transition relation instead of the
calling relation, and propagate the information to
determine when it is necessary and sufficient to check
for disjunctive well-foundedness on the calling
relation. We prove that our approach is sound and
complete. We have implemented a prototype of our
approach, and confirmed that it is able to
automatically verify liveness properties of some
non-trivial higher-order programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Plotkin:2016:SNV,
author = "Gordon D. Plotkin and Nikolaj Bj{\o}rner and Nuno P.
Lopes and Andrey Rybalchenko and George Varghese",
title = "Scaling network verification using symmetry and
surgery",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "69--83",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837657",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "On the surface, large data centers with about 100,000
stations and nearly a million routing rules are complex
and hard to verify. However, these networks are highly
regular by design; for example they employ fat tree
topologies with backup routers interconnected by
redundant patterns. To exploit these regularities, we
introduce network transformations: given a reachability
formula and a network, we transform the network into a
simpler to verify network and a corresponding
transformed formula, such that the original formula is
valid in the network if and only if the transformed
formula is valid in the transformed network. Our
network transformations exploit network surgery (in
which irrelevant or redundant sets of nodes, headers,
ports, or rules are ``sliced'' away) and network
symmetry (say between backup routers). The validity of
these transformations is established using a formal
theory of networks. In particular, using Van
Benthem--Hennessy--Milner style bisimulation, we show
that one can generally associate bisimulations to
transformations connecting networks and formulas with
their transforms. Our work is a development in an area
of current wide interest: applying programming language
techniques (in our case bisimulation and modal logic)
to problems in switching networks. We provide
experimental evidence that our network transformations
can speed up by 65x the task of verifying the
communication between all pairs of Virtual Machines in
a large datacenter network with about 100,000 VMs. An
all-pair reachability calculation, which formerly took
5.5 days, can be done in 2 hours, and can be easily
parallelized to complete in minutes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Brotherston:2016:MCS,
author = "James Brotherston and Nikos Gorogiannis and Max
Kanovich and Reuben Rowe",
title = "Model checking for symbolic-heap separation logic with
inductive predicates",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "84--96",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837621",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We investigate the *model checking* problem for
symbolic-heap separation logic with user-defined
inductive predicates, i.e., the problem of checking
that a given stack-heap memory state satisfies a given
formula in this language, as arises e.g. in software
testing or runtime verification. First, we show that
the problem is *decidable*; specifically, we present a
bottom-up fixed point algorithm that decides the
problem and runs in exponential time in the size of the
problem instance. Second, we show that, while model
checking for the full language is EXPTIME-complete, the
problem becomes NP-complete or PTIME-solvable when we
impose natural syntactic restrictions on the schemata
defining the inductive predicates. We additionally
present NP and PTIME algorithms for these restricted
fragments. Finally, we report on the experimental
performance of our procedures on a variety of
specifications extracted from programs, exercising
multiple combinations of syntactic restrictions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Koskinen:2016:RCR,
author = "Eric Koskinen and Junfeng Yang",
title = "Reducing crash recoverability to reachability",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "97--108",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837648",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software applications run on a variety of platforms
(filesystems, virtual slices, mobile hardware, etc.)
that do not provide 100\% uptime. As such, these
applications may crash at any unfortunate moment losing
volatile data and, when re-launched, they must be able
to correctly recover from potentially inconsistent
states left on persistent storage. From a verification
perspective, crash recovery bugs can be particularly
frustrating because, even when it has been formally
proved for a program that it satisfies a property, the
proof is foiled by these external events that crash and
restart the program. In this paper we first provide a
hierarchical formal model of what it means for a
program to be crash recoverable. Our model captures the
recoverability of many real world programs, including
those in our evaluation which use sophisticated
recovery algorithms such as shadow paging and
write-ahead logging. Next, we introduce a novel
technique capable of automatically proving that a
program correctly recovers from a crash via a reduction
to reachability. Our technique takes an input
control-flow automaton and transforms it into an
encoding that blends the capture of snapshots of
pre-crash states into a symbolic search for a proof
that recovery terminates and every recovered execution
simulates some crash-free execution. Our encoding is
designed to enable one to apply existing abstraction
techniques in order to do the work that is necessary to
prove recoverability. We have implemented our technique
in a tool called Eleven82, capable of analyzing C
programs to detect recoverability bugs or prove their
absence. We have applied our tool to benchmark examples
drawn from industrial file systems and databases,
including GDBM, LevelDB, LMDB, PostgreSQL, SQLite,
VMware and ZooKeeper. Within minutes, our tool is able
to discover bugs or prove that these fragments are
crash recoverable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Zhang:2016:QGM,
author = "Xin Zhang and Ravi Mangal and Aditya V. Nori and Mayur
Naik",
title = "Query-guided maximum satisfiability",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "109--122",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837658",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new optimization problem ``Q-MaxSAT'', an
extension of the well-known Maximum Satisfiability or
MaxSAT problem. In contrast to MaxSAT, which aims to
find an assignment to all variables in the formula,
Q-MaxSAT computes an assignment to a desired subset of
variables (or queries) in the formula. Indeed, many
problems in diverse domains such as program reasoning,
information retrieval, and mathematical optimization
can be naturally encoded as Q-MaxSAT instances. We
describe an iterative algorithm for solving Q-MaxSAT.
In each iteration, the algorithm solves a subproblem
that is relevant to the queries, and applies a novel
technique to check whether the partial assignment found
is a solution to the Q-MaxSAT problem. If the check
fails, the algorithm grows the subproblem with a new
set of clauses identified as relevant to the queries.
Our empirical evaluation shows that our Q-MaxSAT solver
Pilot achieves significant improvements in runtime and
memory consumption over conventional MaxSAT solvers on
several Q-MaxSAT instances generated from real-world
problems in program analysis and information
retrieval.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Lin:2016:SSW,
author = "Anthony W. Lin and Pablo Barcel{\'o}",
title = "String solving with word equations and transducers:
towards a logic for analysing mutation {XSS}",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "123--136",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837641",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study the fundamental issue of decidability of
satisfiability over string logics with concatenations
and finite-state transducers as atomic operations.
Although restricting to one type of operations yields
decidability, little is known about the decidability of
their combined theory, which is especially relevant
when analysing security vulnerabilities of dynamic web
pages in a more realistic browser model. On the one
hand, word equations (string logic with concatenations)
cannot precisely capture sanitisation functions (e.g.
htmlescape) and implicit browser transductions (e.g.
innerHTML mutations). On the other hand, transducers
suffer from the reverse problem of being able to model
sanitisation functions and browser transductions, but
not string concatenations. Naively combining word
equations and transducers easily leads to an
undecidable logic. Our main contribution is to show
that the ``straight-line fragment'' of the logic is
decidable (complexity ranges from PSPACE to EXPSPACE).
The fragment can express the program logics of
straight-line string-manipulating programs with
concatenations and transductions as atomic operations,
which arise when performing bounded model checking or
dynamic symbolic executions. We demonstrate that the
logic can naturally express constraints required for
analysing mutation XSS in web applications. Finally,
the logic remains decidable in the presence of length,
letter-counting, regular, indexOf, and disequality
constraints.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Cardelli:2016:SCD,
author = "Luca Cardelli and Mirco Tribastone and Max
Tschaikowski and Andrea Vandin",
title = "Symbolic computation of differential equivalences",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "137--150",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837649",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Ordinary differential equations (ODEs) are widespread
in many natural sciences including chemistry, ecology,
and systems biology, and in disciplines such as control
theory and electrical engineering. Building on the
celebrated molecules-as-processes paradigm, they have
become increasingly popular in computer science, with
high-level languages and formal methods such as Petri
nets, process algebra, and rule-based systems that are
interpreted as ODEs. We consider the problem of
comparing and minimizing ODEs automatically. Influenced
by traditional approaches in the theory of programming,
we propose differential equivalence relations. We study
them for a basic intermediate language, for which we
have decidability results, that can be targeted by a
class of high-level specifications. An ODE implicitly
represents an uncountable state space, hence reasoning
techniques cannot be borrowed from established domains
such as probabilistic programs with finite-state Markov
chain semantics. We provide novel symbolic procedures
to check an equivalence and compute the largest one via
partition refinement algorithms that use satisfiability
modulo theories. We illustrate the generality of our
framework by showing that differential equivalences
include (i) well-known notions for the minimization of
continuous-time Markov chains (lumpability),
(ii)~bisimulations for chemical reaction networks
recently proposed by Cardelli et al., and (iii)
behavioral relations for process algebra with ODE
semantics. With a prototype implementation we are able
to detect equivalences in biochemical models from the
literature that cannot be reduced using competing
automatic techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Hague:2016:UDC,
author = "Matthew Hague and Jonathan Kochems and C.-H. Luke
Ong",
title = "Unboundedness and downward closures of higher-order
pushdown automata",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "151--163",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837627",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show the diagonal problem for higher-order pushdown
automata (HOPDA), and hence the simultaneous
unboundedness problem, is decidable. From recent work
by Zetzsche this means that we can construct the
downward closure of the set of words accepted by a
given HOPDA. This also means we can construct the
downward closure of the Parikh image of a HOPDA. Both
of these consequences play an important role in
verifying concurrent higher-order programs expressed as
HOPDA or safe higher-order recursion schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Devriese:2016:FAC,
author = "Dominique Devriese and Marco Patrignani and Frank
Piessens",
title = "Fully-abstract compilation by approximate
back-translation",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "164--177",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837618",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A compiler is fully-abstract if the compilation from
source language programs to target language programs
reflects and preserves behavioural equivalence. Such
compilers have important security benefits, as they
limit the power of an attacker interacting with the
program in the target language to that of an attacker
interacting with the program in the source language.
Proving compiler full-abstraction is, however, rather
complicated. A common proof technique is based on the
back-translation of target-level program contexts to
behaviourally-equivalent source-level contexts.
However, constructing such a back-translation is
problematic when the source language is not strong
enough to embed an encoding of the target language. For
instance, when compiling from the simply-typed $
\lambda $-calculus ($ \lambda \tau $) to the untyped $
\lambda $-calculus ($ \lambda u$), the lack of
recursive types in \lambda \tau prevents such a
back-translation. We propose a general and elegant
solution for this problem. The key insight is that it
suffices to construct an approximate back-translation.
The approximation is only accurate up to a certain
number of steps and conservative beyond that, in the
sense that the context generated by the
back-translation may diverge when the original would
not, but not vice versa. Based on this insight, we
describe a general technique for proving compiler
full-abstraction and demonstrate it on a compiler from
$ \lambda \tau $ to $ \lambda u$. The proof uses
asymmetric cross-language logical relations and makes
innovative use of step-indexing to express the relation
between a context and its approximate back-translation.
We believe this proof technique can scale to
challenging settings and enable simpler, more scalable
proofs of compiler full-abstraction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Kang:2016:LVS,
author = "Jeehoon Kang and Yoonseung Kim and Chung-Kil Hur and
Derek Dreyer and Viktor Vafeiadis",
title = "Lightweight verification of separate compilation",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "178--190",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837642",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Major compiler verification efforts, such as the
CompCert project, have traditionally simplified the
verification problem by restricting attention to the
correctness of whole-program compilation, leaving open
the question of how to verify the correctness of
separate compilation. Recently, a number of
sophisticated techniques have been proposed for proving
more flexible, compositional notions of compiler
correctness, but these approaches tend to be quite
heavyweight compared to the simple ``closed
simulations'' used in verifying whole-program
compilation. Applying such techniques to a compiler
like CompCert, as Stewart et al. have done, involves
major changes and extensions to its original
verification. In this paper, we show that if we aim
somewhat lower---to prove correctness of separate
compilation, but only for a *single* compiler---we can
drastically simplify the proof effort. Toward this end,
we develop several lightweight techniques that recast
the compositional verification problem in terms of
whole-program compilation, thereby enabling us to
largely reuse the closed-simulation proofs from
existing compiler verifications. We demonstrate the
effectiveness of these techniques by applying them to
CompCert 2.4, converting its verification of
whole-program compilation into a verification of
separate compilation in less than two person-months.
This conversion only required a small number of changes
to the original proofs, and uncovered two compiler bugs
along the way. The result is SepCompCert, the first
verification of separate compilation for the full
CompCert compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Robbins:2016:MMS,
author = "Ed Robbins and Andy King and Tom Schrijvers",
title = "From {MinX} to {MinC}: semantics-driven decompilation
of recursive datatypes",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "191--203",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837633",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reconstructing the meaning of a program from its
binary executable is known as reverse engineering; it
has a wide range of applications in software security,
exposing piracy, legacy systems, etc. Since reversing
is ultimately a search for meaning, there is much
interest in inferring a type (a meaning) for the
elements of a binary in a consistent way. Unfortunately
existing approaches do not guarantee any semantic
relevance for their reconstructed types. This paper
presents a new and semantically-founded approach that
provides strong guarantees for the reconstructed types.
Key to our approach is the derivation of a witness
program in a high-level language alongside the
reconstructed types. This witness has the same
semantics as the binary, is type correct by
construction, and it induces a (justifiable) type
assignment on the binary. Moreover, the approach
effectively yields a type-directed decompiler. We
formalise and implement the approach for reversing
MinX, an abstraction of x86, to MinC, a type-safe
dialect of C with recursive datatypes. Our evaluation
compiles a range of textbook C algorithms to MinX and
then recovers the original structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Lorenzen:2016:STD,
author = "Florian Lorenzen and Sebastian Erdweg",
title = "Sound type-dependent syntactic language extension",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "204--216",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837644",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Syntactic language extensions can introduce new
facilities into a programming language while requiring
little implementation effort and modest changes to the
compiler. It is typical to desugar language extensions
in a distinguished compiler phase after parsing or type
checking, not affecting any of the later compiler
phases. If desugaring happens before type checking, the
desugaring cannot depend on typing information and type
errors are reported in terms of the generated code. If
desugaring happens after type checking, the code
generated by the desugaring is not type checked and may
introduce vulnerabilities. Both options are
undesirable. We propose a system for syntactic
extensibility where desugaring happens after type
checking and desugarings are guaranteed to only
generate well-typed code. A major novelty of our work
is that desugarings operate on typing derivations
instead of plain syntax trees. This provides
desugarings access to typing information and forms the
basis for the soundness guarantee we provide, namely
that a desugaring generates a valid typing derivation.
We have implemented our system for syntactic
extensibility in a language-independent fashion and
instantiated it for a substantial subset of Java,
including generics and inheritance. We provide a sound
Java extension for Scala-like for-comprehensions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Padon:2016:DII,
author = "Oded Padon and Neil Immerman and Sharon Shoham and
Aleksandr Karbyshev and Mooly Sagiv",
title = "Decidability of inferring inductive invariants",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "217--231",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837640",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Induction is a successful approach for verification of
hardware and software systems. A common practice is to
model a system using logical formulas, and then use a
decision procedure to verify that some logical formula
is an inductive safety invariant for the system. A key
ingredient in this approach is coming up with the
inductive invariant, which is known as invariant
inference. This is a major difficulty, and it is often
left for humans or addressed by sound but incomplete
abstract interpretation. This paper is motivated by the
problem of inductive invariants in shape analysis and
in distributed protocols. This paper approaches the
general problem of inferring first-order inductive
invariants by restricting the language L of candidate
invariants. Notice that the problem of invariant
inference in a restricted language L differs from the
safety problem, since a system may be safe and still
not have any inductive invariant in L that proves
safety. Clearly, if L is finite (and if testing an
inductive invariant is decidable), then inferring
invariants in L is decidable. This paper presents some
interesting cases when inferring inductive invariants
in L is decidable even when L is an infinite language
of universal formulas. Decidability is obtained by
restricting L and defining a suitable well-quasi-order
on the state space. We also present some undecidability
results that show that our restrictions are necessary.
We further present a framework for systematically
constructing infinite languages while keeping the
invariant inference problem decidable. We illustrate
our approach by showing the decidability of inferring
invariants for programs manipulating linked-lists, and
for distributed protocols.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Lavaee:2016:HDP,
author = "Rahman Lavaee",
title = "The hardness of data packing",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "232--242",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837669",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A program can benefit from improved cache block
utilization when contemporaneously accessed data
elements are placed in the same memory block. This can
reduce the program's memory block working set and
thereby, reduce the capacity miss rate. We formally
define the problem of data packing for arbitrary number
of blocks in the cache and packing factor (the number
of data objects fitting in a cache block) and study how
well the optimal solution can be approximated for two
dual problems. On the one hand, we show that the cache
hit maximization problem is approximable within a
constant factor, for every fixed number of blocks in
the cache. On the other hand, we show that unless P=NP,
the cache miss minimization problem cannot be
efficiently approximated.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Gimenez:2016:CI,
author = "St{\'e}phane Gimenez and Georg Moser",
title = "The complexity of interaction",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "243--255",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837646",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we analyze the complexity of functional
programs written in the interaction-net computation
model, an asynchronous, parallel and confluent model
that generalizes linear-logic proof nets. Employing
user-defined sized and scheduled types, we certify
concrete time, space and space-time complexity bounds
for both sequential and parallel reductions of
interaction-net programs by suitably assigning
complexity potentials to typed nodes. The relevance of
this approach is illustrated on archetypal programming
examples. The provided analysis is precise,
compositional and is, in theory, not restricted to
particular complexity classes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Swamy:2016:DTM,
author = "Nikhil Swamy and Catalin Hritcu and Chantal Keller and
Aseem Rastogi and Antoine Delignat-Lavaud and Simon
Forest and Karthikeyan Bhargavan and C{\'e}dric Fournet
and Pierre-Yves Strub and Markulf Kohlweiss and
Jean-Karim Zinzindohoue and Santiago
Zanella-B{\'e}guelin",
title = "Dependent types and multi-monadic effects in {F*}",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "256--270",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837655",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new, completely redesigned, version of
F*, a language that works both as a proof assistant as
well as a general-purpose, verification-oriented,
effectful programming language. In support of these
complementary roles, F* is a dependently typed,
higher-order, call-by-value language with _primitive_
effects including state, exceptions, divergence and IO.
Although primitive, programmers choose the granularity
at which to specify effects by equipping each effect
with a monadic, predicate transformer semantics. F*
uses this to efficiently compute weakest preconditions
and discharges the resulting proof obligations using a
combination of SMT solving and manual proofs. Isolated
from the effects, the core of F* is a language of pure
functions used to write specifications and proof
terms---its consistency is maintained by a semantic
termination check based on a well-founded order. We
evaluate our design on more than 55,000 lines of F* we
have authored in the last year, focusing on three main
case studies. Showcasing its use as a general-purpose
programming language, F* is programmed (but not
verified) in F*, and bootstraps in both OCaml and F\#.
Our experience confirms F*'s pay-as-you-go cost model:
writing idiomatic ML-like code with no finer
specifications imposes no user burden. As a
verification-oriented language, our most significant
evaluation of F* is in verifying several key modules in
an implementation of the TLS-1.2 protocol standard. For
the modules we considered, we are able to prove more
properties, with fewer annotations using F* than in a
prior verified implementation of TLS-1.2. Finally, as a
proof assistant, we discuss our use of F* in
mechanizing the metatheory of a range of lambda
calculi, starting from the simply typed lambda calculus
to System F-omega and even micro-F*, a sizeable
fragment of F* itself---these proofs make essential use
of F*'s flexible combination of SMT automation and
constructive proofs, enabling a tactic-free style of
programming and proving at a relatively large scale.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Borgstrom:2016:FRF,
author = "Johannes Borgstr{\"o}m and Andrew D. Gordon and Long
Ouyang and Claudio Russo and Adam {\'S}cibior and
Marcin Szymczak",
title = "{Fabular}: regression formulas as probabilistic
programming",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "271--283",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837653",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Regression formulas are a domain-specific language
adopted by several R packages for describing an
important and useful class of statistical models:
hierarchical linear regressions. Formulas are succinct,
expressive, and clearly popular, so are they a useful
addition to probabilistic programming languages? And
what do they mean? We propose a core calculus of
hierarchical linear regression, in which regression
coefficients are themselves defined by nested
regressions (unlike in R). We explain how our calculus
captures the essence of the formula DSL found in R. We
describe the design and implementation of Fabular, a
version of the Tabular schema-driven probabilistic
programming language, enriched with formulas based on
our regression calculus. To the best of our knowledge,
this is the first formal description of the core ideas
of R's formula notation, the first development of a
calculus of regression formulas, and the first
demonstration of the benefits of composing regression
formulas and latent variables in a probabilistic
programming language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Grathwohl:2016:KCN,
author = "Bj{\o}rn Bugge Grathwohl and Fritz Henglein and Ulrik
Terp Rasmussen and Kristoffer Aalund S{\o}holm and
Sebastian Paaske T{\o}rholm",
title = "{Kleenex}: compiling nondeterministic transducers to
deterministic streaming transducers",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "284--297",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837647",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present and illustrate Kleenex, a language for
expressing general nondeterministic finite transducers,
and its novel compilation to streaming string
transducers with essentially optimal streaming
behavior, worst-case linear-time performance and
sustained high throughput. Its underlying theory is
based on transducer decomposition into oracle and
action machines: the oracle machine performs streaming
greedy disambiguation of the input; the action machine
performs the output actions. In use cases Kleenex
achieves consistently high throughput rates around the
1 Gbps range on stock hardware. It performs well,
especially in complex use cases, in comparison to both
specialized and related tools such as GNUawk, GNUsed,
GNUgrep, RE2, Ragel and regular-expression libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Long:2016:APG,
author = "Fan Long and Martin Rinard",
title = "Automatic patch generation by learning correct code",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "298--312",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837617",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Prophet, a novel patch generation system
that works with a set of successful human patches
obtained from open- source software repositories to
learn a probabilistic, application-independent model of
correct code. It generates a space of candidate
patches, uses the model to rank the candidate patches
in order of likely correctness, and validates the
ranked patches against a suite of test cases to find
correct patches. Experimental results show that, on a
benchmark set of 69 real-world defects drawn from eight
open-source projects, Prophet significantly outperforms
the previous state-of-the-art patch generation
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Katz:2016:ETB,
author = "Omer Katz and Ran El-Yaniv and Eran Yahav",
title = "Estimating types in binaries using predictive
modeling",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "313--326",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837674",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reverse engineering is an important tool in mitigating
vulnerabilities in binaries. As a lot of software is
developed in object-oriented languages, reverse
engineering of object-oriented code is of critical
importance. One of the major hurdles in reverse
engineering binaries compiled from object-oriented code
is the use of dynamic dispatch. In the absence of debug
information, any dynamic dispatch may seem to jump to
many possible targets, posing a significant challenge
to a reverse engineer trying to track the program flow.
We present a novel technique that allows us to
statically determine the likely targets of virtual
function calls. Our technique uses object tracelets ---
statically constructed sequences of operations
performed on an object --- to capture potential runtime
behaviors of the object. Our analysis automatically
pre-labels some of the object tracelets by relying on
instances where the type of an object is known. The
resulting type-labeled tracelets are then used to train
a statistical language model (SLM) for each type.We
then use the resulting ensemble of SLMs over unlabeled
tracelets to generate a ranking of their most likely
types, from which we deduce the likely targets of
dynamic dispatches.We have implemented our technique
and evaluated it over real-world C++ binaries. Our
evaluation shows that when there are multiple
alternative targets, our approach can drastically
reduce the number of targets that have to be considered
by a reverse engineer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Chatterjee:2016:AAQ,
author = "Krishnendu Chatterjee and Hongfei Fu and Petr
Novotn{\'y} and Rouzbeh Hasheminezhad",
title = "Algorithmic analysis of qualitative and quantitative
termination problems for affine probabilistic
programs",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "327--342",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837639",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we consider termination of
probabilistic programs with real-valued variables. The
questions concerned are: 1. qualitative ones that ask
(i) whether the program terminates with probability 1
(almost-sure termination) and (ii) whether the expected
termination time is finite (finite termination); 2.
quantitative ones that ask (i) to approximate the
expected termination time (expectation problem) and
(ii) to compute a bound B such that the probability to
terminate after B steps decreases exponentially
(concentration problem). To solve these questions, we
utilize the notion of ranking supermartingales which is
a powerful approach for proving termination of
probabilistic programs. In detail, we focus on
algorithmic synthesis of linear
ranking-supermartingales over affine probabilistic
programs (APP's) with both angelic and demonic
non-determinism. An important subclass of APP's is
LRAPP which is defined as the class of all APP's over
which a linear ranking-supermartingale exists. Our main
contributions are as follows. Firstly, we show that the
membership problem of LRAPP (i) can be decided in
polynomial time for APP's with at most demonic
non-determinism, and (ii) is NP-hard and in PSPACE for
APP's with angelic non-determinism; moreover, the
NP-hardness result holds already for APP's without
probability and demonic non-determinism. Secondly, we
show that the concentration problem over LRAPP can be
solved in the same complexity as for the membership
problem of LRAPP. Finally, we show that the expectation
problem over LRAPP can be solved in 2EXPTIME and is
PSPACE-hard even for APP's without probability and
non-determinism (i.e., deterministic programs). Our
experimental results demonstrate the effectiveness of
our approach to answer the qualitative and quantitative
questions over APP's with at most demonic
non-determinism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Singh:2016:TSD,
author = "Rishabh Singh and Sumit Gulwani",
title = "Transforming spreadsheet data types using examples",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "343--356",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837668",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cleaning spreadsheet data types is a common problem
faced by millions of spreadsheet users. Data types such
as date, time, name, and units are ubiquitous in
spreadsheets, and cleaning transformations on these
data types involve parsing and pretty printing their
string representations. This presents many challenges
to users because cleaning such data requires some
background knowledge about the data itself and moreover
this data is typically non-uniform, unstructured, and
ambiguous. Spreadsheet systems and Programming
Languages provide some UI-based and programmatic
solutions for this problem but they are either
insufficient for the user's needs or are beyond their
expertise. In this paper, we present a programming by
example methodology of cleaning data types that learns
the desired transformation from a few input-output
examples. We propose a domain specific language with
probabilistic semantics that is parameterized with
declarative data type definitions. The probabilistic
semantics is based on three key aspects: (i)
approximate predicate matching, (ii) joint learning of
data type interpretation, and (iii) weighted branches.
This probabilistic semantics enables the language to
handle non-uniform, unstructured, and ambiguous data.
We then present a synthesis algorithm that learns the
desired program in this language from a set of
input-output examples. We have implemented our
algorithm as an Excel add-in and present its successful
evaluation on 55 benchmark problems obtained from
online help forums and Excel product team.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Lesani:2016:CCC,
author = "Mohsen Lesani and Christian J. Bell and Adam
Chlipala",
title = "{Chapar}: certified causally consistent distributed
key-value stores",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "357--370",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837622",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's Internet services are often expected to stay
available and render high responsiveness even in the
face of site crashes and network partitions.
Theoretical results state that causal consistency is
one of the strongest consistency guarantees that is
possible under these requirements, and many practical
systems provide causally consistent key-value stores.
In this paper, we present a framework called Chapar for
modular verification of causal consistency for
replicated key-value store implementations and their
client programs. Specifically, we formulate separate
correctness conditions for key-value store
implementations and for their clients. The interface
between the two is a novel operational semantics for
causal consistency. We have verified the causal
consistency of two key-value store implementations from
the literature using a novel proof technique. We have
also implemented a simple automatic model checker for
the correctness of client programs. The two
independently verified results for the implementations
and clients can be composed to conclude the correctness
of any of the programs when executed with any of the
implementations. We have developed and checked our
framework in Coq, extracted it to OCaml, and built
executable stores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Gotsman:2016:CIS,
author = "Alexey Gotsman and Hongseok Yang and Carla Ferreira
and Mahsa Najafzadeh and Marc Shapiro",
title = "'{Cause} {I}'m strong enough': Reasoning about
consistency choices in distributed systems",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "371--384",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837625",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large-scale distributed systems often rely on
replicated databases that allow a programmer to request
different data consistency guarantees for different
operations, and thereby control their performance.
Using such databases is far from trivial: requesting
stronger consistency in too many places may hurt
performance, and requesting it in too few places may
violate correctness. To help programmers in this task,
we propose the first proof rule for establishing that a
particular choice of consistency guarantees for various
operations on a replicated database is enough to ensure
the preservation of a given data integrity invariant.
Our rule is modular: it allows reasoning about the
behaviour of every operation separately under some
assumption on the behaviour of other operations. This
leads to simple reasoning, which we have automated in
an SMT-based tool. We present a nontrivial proof of
soundness of our rule and illustrate its use on several
examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Liang:2016:PLC,
author = "Hongjin Liang and Xinyu Feng",
title = "A program logic for concurrent objects under fair
scheduling",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "385--399",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837635",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing work on verifying concurrent objects is
mostly concerned with safety only, e.g., partial
correctness or linearizability. Although there has been
recent work verifying lock-freedom of non-blocking
objects, much less efforts are focused on
deadlock-freedom and starvation-freedom, progress
properties of blocking objects. These properties are
more challenging to verify than lock-freedom because
they allow the progress of one thread to depend on the
progress of another, assuming fair scheduling. We
propose LiLi, a new rely-guarantee style program logic
for verifying linearizability and progress together for
concurrent objects under fair scheduling. The
rely-guarantee style logic unifies thread-modular
reasoning about both starvation-freedom and
deadlock-freedom in one framework. It also establishes
progress-aware abstraction for concurrent objects,
which can be applied when verifying safety and liveness
of client code. We have successfully applied the logic
to verify starvation-freedom or deadlock-freedom of
representative algorithms such as ticket locks, queue
locks, lock-coupling lists, optimistic lists and lazy
lists.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Dragoi:2016:PPS,
author = "Cezara Dragoi and Thomas A. Henzinger and Damien
Zufferey",
title = "{PSync}: a partially synchronous language for
fault-tolerant distributed algorithms",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "400--415",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837650",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Fault-tolerant distributed algorithms play an
important role in many critical/high-availability
applications. These algorithms are notoriously
difficult to implement correctly, due to asynchronous
communication and the occurrence of faults, such as the
network dropping messages or computers crashing. We
introduce PSync, a domain specific language based on
the Heard-Of model, which views asynchronous faulty
systems as synchronous ones with an adversarial
environment that simulates asynchrony and faults by
dropping messages. We define a runtime system for PSync
that efficiently executes on asynchronous networks. We
formalise the relation between the runtime system and
PSync in terms of observational refinement. The
high-level lockstep abstraction introduced by PSync
simplifies the design and implementation of
fault-tolerant distributed algorithms and enables
automated formal verification. We have implemented an
embedding of PSync in the Scala programming language
with a runtime system for partially synchronous
networks. We show the applicability of PSync by
implementing several important fault-tolerant
distributed algorithms and we compare the
implementation of consensus algorithms in PSync against
implementations in other languages in terms of code
size, runtime efficiency, and verification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Chen:2016:PTI,
author = "Sheng Chen and Martin Erwig",
title = "Principal type inference for {GADTs}",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "416--428",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837665",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new method for GADT type inference that
improves the precision of previous approaches. In
particular, our approach accepts more type-correct
programs than previous approaches when they do not
employ type annotations. A side benefit of our approach
is that it can detect a wide range of runtime errors
that are missed by previous approaches. Our method is
based on the idea to represent type refinements in
pattern-matching branches by choice types, which
facilitate a separation of the typing and
reconciliation phases and thus support case
expressions. This idea is formalized in a type system,
which is both sound and a conservative extension of the
classical Hindley--Milner system. We present the
results of an empirical evaluation that compares our
algorithm with previous approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Garcia:2016:AGT,
author = "Ronald Garcia and Alison M. Clark and {\'E}ric
Tanter",
title = "Abstracting gradual typing",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "429--442",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837670",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Language researchers and designers have extended a
wide variety of type systems to support gradual typing,
which enables languages to seamlessly combine dynamic
and static checking. These efforts consistently
demonstrate that designing a satisfactory gradual
counterpart to a static type system is challenging, and
this challenge only increases with the sophistication
of the type system. Gradual type system designers need
more formal tools to help them conceptualize,
structure, and evaluate their designs. In this paper,
we propose a new formal foundation for gradual typing,
drawing on principles from abstract interpretation to
give gradual types a semantics in terms of pre-existing
static types. Abstracting Gradual Typing (AGT for
short) yields a formal account of consistency---one of
the cornerstones of the gradual typing approach---that
subsumes existing notions of consistency, which were
developed through intuition and ad hoc reasoning. Given
a syntax-directed static typing judgment, the AGT
approach induces a corresponding gradual typing
judgment. Then the type safety proof for the underlying
static discipline induces a dynamic semantics for
gradual programs defined over source-language typing
derivations. The AGT approach does not resort to an
externally justified cast calculus: instead, run-time
checks naturally arise by deducing evidence for
consistent judgments during proof reduction. To
illustrate the approach, we develop a novel
gradually-typed counterpart for a language with record
subtyping. Gradual languages designed with the AGT
approach satisfy by construction the refined criteria
for gradual typing set forth by Siek and colleagues.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Cimini:2016:GMA,
author = "Matteo Cimini and Jeremy G. Siek",
title = "The gradualizer: a methodology and algorithm for
generating gradual type systems",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "443--455",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837632",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many languages are beginning to integrate dynamic and
static typing. Siek and Taha offered gradual typing as
an approach to this integration that provides a
coherent and full-span migration between the two
disciplines. However, the literature lacks a general
methodology for designing gradually typed languages.
Our first contribution is to provide a methodology for
deriving the gradual type system and the compilation to
the cast calculus. Based on this methodology, we
present the Gradualizer, an algorithm that generates a
gradual type system from a well-formed type system and
also generates a compiler to the cast calculus. Our
algorithm handles a large class of type systems and
generates systems that are correct with respect to the
formal criteria of gradual typing. We also report on an
implementation of the Gradualizer that takes a type
system expressed in lambda-prolog and outputs its
gradually typed version and a compiler to the cast
calculus in lambda-prolog.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Takikawa:2016:SGT,
author = "Asumu Takikawa and Daniel Feltey and Ben Greenman and
Max S. New and Jan Vitek and Matthias Felleisen",
title = "Is sound gradual typing dead?",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "456--468",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837630",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers have come to embrace dynamically-typed
languages for prototyping and delivering large and
complex systems. When it comes to maintaining and
evolving these systems, the lack of explicit static
typing becomes a bottleneck. In response, researchers
have explored the idea of gradually-typed programming
languages which allow the incremental addition of type
annotations to software written in one of these untyped
languages. Some of these new, hybrid languages insert
run-time checks at the boundary between typed and
untyped code to establish type soundness for the
overall system. With sound gradual typing, programmers
can rely on the language implementation to provide
meaningful error messages when type invariants are
violated. While most research on sound gradual typing
remains theoretical, the few emerging implementations
suffer from performance overheads due to these checks.
None of the publications on this topic comes with a
comprehensive performance evaluation. Worse, a few
report disastrous numbers. In response, this paper
proposes a method for evaluating the performance of
gradually-typed programming languages. The method
hinges on exploring the space of partial conversions
from untyped to typed. For each benchmark, the
performance of the different versions is reported in a
synthetic metric that associates runtime overhead to
conversion effort. The paper reports on the results of
applying the method to Typed Racket, a mature
implementation of sound gradual typing, using a suite
of real-world programs of various sizes and
complexities. Based on these results the paper
concludes that, given the current state of
implementation technologies, sound gradual typing faces
significant challenges. Conversely, it raises the
question of how implementations could reduce the
overheads associated with soundness and how tools could
be used to steer programmers clear from pathological
cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Octeau:2016:CSA,
author = "Damien Octeau and Somesh Jha and Matthew Dering and
Patrick McDaniel and Alexandre Bartel and Li Li and
Jacques Klein and Yves {Le Traon}",
title = "Combining static analysis with probabilistic models to
enable market-scale {Android} inter-component
analysis",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "469--484",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837661",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Static analysis has been successfully used in many
areas, from verifying mission-critical software to
malware detection. Unfortunately, static analysis often
produces false positives, which require significant
manual effort to resolve. In this paper, we show how to
overlay a probabilistic model, trained using domain
knowledge, on top of static analysis results, in order
to triage static analysis results. We apply this idea
to analyzing mobile applications. Android application
components can communicate with each other, both within
single applications and between different applications.
Unfortunately, techniques to statically infer
Inter-Component Communication (ICC) yield many
potential inter-component and inter-application links,
most of which are false positives. At large scales,
scrutinizing all potential links is simply not
feasible. We therefore overlay a probabilistic model of
ICC on top of static analysis results. Since computing
the inter-component links is a prerequisite to
inter-component analysis, we introduce a formalism for
inferring ICC links based on set constraints. We design
an efficient algorithm for performing link resolution.
We compute all potential links in a corpus of 11,267
applications in 30 minutes and triage them using our
probabilistic approach. We find that over 95.1\% of all
636 million potential links are associated with
probability values below 0.01 and are thus likely
unfeasible links. Thus, it is possible to consider only
a small subset of all links without significant loss of
information. This work is the first significant step in
making static inter-application analysis more
tractable, even at large scales.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Grigore:2016:ARG,
author = "Radu Grigore and Hongseok Yang",
title = "Abstraction refinement guided by a learnt
probabilistic model",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "485--498",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837663",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The core challenge in designing an effective static
program analysis is to find a good program abstraction
--- one that retains only details relevant to a given
query. In this paper, we present a new approach for
automatically finding such an abstraction. Our approach
uses a pessimistic strategy, which can optionally use
guidance from a probabilistic model. Our approach
applies to parametric static analyses implemented in
Datalog, and is based on counterexample-guided
abstraction refinement. For each untried abstraction,
our probabilistic model provides a probability of
success, while the size of the abstraction provides an
estimate of its cost in terms of analysis time.
Combining these two metrics, probability and cost, our
refinement algorithm picks an optimal abstraction. Our
probabilistic model is a variant of the Erdos--Renyi
random graph model, and it is tunable by what we call
hyperparameters. We present a method to learn good
values for these hyperparameters, by observing past
runs of the analysis on an existing codebase. We
evaluate our approach on an object sensitive pointer
analysis for Java programs, with two client analyses
(PolySite and Downcast).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Garg:2016:LIU,
author = "Pranav Garg and Daniel Neider and P. Madhusudan and
Dan Roth",
title = "Learning invariants using decision trees and
implication counterexamples",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "499--512",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837664",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Inductive invariants can be robustly synthesized using
a learning model where the teacher is a program
verifier who instructs the learner through concrete
program configurations, classified as positive,
negative, and implications. We propose the first
learning algorithms in this model with implication
counter-examples that are based on machine learning
techniques. In particular, we extend classical
decision-tree learning algorithms in machine learning
to handle implication samples, building new scalable
ways to construct small decision trees using
statistical measures. We also develop a decision-tree
learning algorithm in this model that is guaranteed to
converge to the right concept (invariant) if one
exists. We implement the learners and an appropriate
teacher, and show that the resulting invariant
synthesis is efficient and convergent for a large suite
of programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Emmi:2016:SAD,
author = "Michael Emmi and Constantin Enea",
title = "Symbolic abstract data type inference",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "513--525",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837645",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Formal specification is a vital ingredient to scalable
verification of software systems. In the case of
efficient implementations of concurrent objects like
atomic registers, queues, and locks, symbolic formal
representations of their abstract data types (ADTs)
enable efficient modular reasoning, decoupling clients
from implementations. Writing adequate formal
specifications, however, is a complex task requiring
rare expertise. In practice, programmers write
reference implementations as informal specifications.
In this work we demonstrate that effective symbolic ADT
representations can be automatically generated from the
executions of reference implementations. Our approach
exploits two key features of naturally-occurring ADTs:
violations can be decomposed into a small set of
representative patterns, and these patterns manifest in
executions with few operations. By identifying certain
algebraic properties of naturally-occurring ADTs, and
exhaustively sampling executions up to a small number
of operations, we generate concise symbolic ADT
representations which are complete in practice,
enabling the application of efficient symbolic
verification algorithms without the burden of manual
specification. Furthermore, the concise ADT violation
patterns we generate are human-readable, and can serve
as useful, formal documentation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Bhaskaracharya:2016:SIA,
author = "Somashekaracharya G. Bhaskaracharya and Uday
Bondhugula and Albert Cohen",
title = "{SMO}: an integrated approach to intra-array and
inter-array storage optimization",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "526--538",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837636",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The polyhedral model provides an expressive
intermediate representation that is convenient for the
analysis and subsequent transformation of affine loop
nests. Several heuristics exist for achieving complex
program transformations in this model. However, there
is also considerable scope to utilize this model to
tackle the problem of automatic memory footprint
optimization. In this paper, we present a new automatic
storage optimization technique which can be used to
achieve both intra-array as well as inter-array storage
reuse with a pre-determined schedule for the
computation. Our approach works by finding
statement-wise storage partitioning hyperplanes that
partition a unified global array space so that values
with overlapping live ranges are not mapped to the same
partition. Our heuristic is driven by a fourfold
objective function which not only minimizes the
dimensionality and storage requirements of arrays
required for each high-level statement, but also
maximizes inter-statement storage reuse. The storage
mappings obtained using our heuristic can be
asymptotically better than those obtained by any
existing technique. We implement our technique and
demonstrate its practical impact by evaluating its
effectiveness on several benchmarks chosen from the
domains of image processing, stencil computations, and
high-performance computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Bao:2016:PDV,
author = "Wenlei Bao and Sriram Krishnamoorthy and
Louis-No{\"e}l Pouchet and Fabrice Rastello and P.
Sadayappan",
title = "{PolyCheck}: dynamic verification of iteration space
transformations on affine programs",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "539--554",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837656",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High-level compiler transformations, especially loop
transformations, are widely recognized as critical
optimizations to restructure programs to improve data
locality and expose parallelism. Guaranteeing the
correctness of program transformations is essential,
and to date three main approaches have been developed:
proof of equivalence of affine programs, matching the
execution traces of programs, and checking bit-by-bit
equivalence of program outputs. Each technique suffers
from limitations in the kind of transformations
supported, space complexity, or the sensitivity to the
testing dataset. In this paper, we take a novel
approach that addresses all three limitations to
provide an automatic bug checker to verify any
iteration reordering transformations on affine
programs, including non-affine transformations, with
space consumption proportional to the original program
data and robust to arbitrary datasets of a given size.
We achieve this by exploiting the structure of affine
program control- and data-flow to generate at
compile-time lightweight checker code to be executed
within the transformed program. Experimental results
assess the correctness and effectiveness of our method
and its increased coverage over previous approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Andrysco:2016:PFP,
author = "Marc Andrysco and Ranjit Jhala and Sorin Lerner",
title = "Printing floating-point numbers: a faster, always
correct method",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "555--567",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837654",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Floating-point numbers are an essential part of modern
software, recently gaining particular prominence on the
web as the exclusive numeric format of Javascript. To
use floating-point numbers, we require a way to convert
binary machine representations into human readable
decimal outputs. Existing conversion algorithms make
trade-offs between completeness and performance. The
classic Dragon4 algorithm by Steele and White and its
later refinements achieve completeness --- i.e. produce
correct and optimal outputs on all inputs --- by using
arbitrary precision integer (bignum) arithmetic which
leads to a high performance cost. On the other hand,
the recent Grisu3 algorithm by Loitsch shows how to
recover performance by using native integer arithmetic
but sacrifices optimality for 0.5\% of all inputs. We
present Errol, a new complete algorithm that is
guaranteed to produce correct and optimal results for
all inputs while simultaneously being 2x faster than
the incomplete Grisu3 and 4x faster than previous
complete methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Orchard:2016:ESS,
author = "Dominic Orchard and Nobuko Yoshida",
title = "Effects as sessions, sessions as effects",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "568--581",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837634",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Effect and session type systems are two expressive
behavioural type systems. The former is usually
developed in the context of the lambda-calculus and its
variants, the latter for the pi-calculus. In this paper
we explore their relative expressive power. Firstly, we
give an embedding from PCF, augmented with a
parameterised effect system, into a session-typed
pi-calculus (session calculus), showing that session
types are powerful enough to express effects. Secondly,
we give a reverse embedding, from the session calculus
back into PCF, by instantiating PCF with concurrency
primitives and its effect system with a session-like
effect algebra; effect systems are powerful enough to
express sessions. The embedding of session types into
an effect system is leveraged to give a new
implementation of session types in Haskell, via an
effect system encoding. The correctness of this
implementation follows from the second embedding
result. We also discuss various extensions to our
embeddings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Jia:2016:MBA,
author = "Limin Jia and Hannah Gommerstadt and Frank Pfenning",
title = "Monitors and blame assignment for higher-order session
types",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "582--594",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837662",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Session types provide a means to prescribe the
communication behavior between concurrent
message-passing processes. However, in a distributed
setting, some processes may be written in languages
that do not support static typing of sessions or may be
compromised by a malicious intruder, violating
invariants of the session types. In such a setting,
dynamically monitoring communication between processes
becomes a necessity for identifying undesirable
actions. In this paper, we show how to dynamically
monitor communication to enforce adherence to session
types in a higher-order setting. We present a system of
blame assignment in the case when the monitor detects
an undesirable action and an alarm is raised. We prove
that dynamic monitoring does not change system behavior
for welltyped processes, and that one of an indicated
set of possible culprits must have been compromised in
case of an alarm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Sangiorgi:2016:EBP,
author = "Davide Sangiorgi and Valeria Vignudelli",
title = "Environmental bisimulations for probabilistic
higher-order languages",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "595--607",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837651",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Environmental bisimulations for probabilistic
higher-order languages are studied. In contrast with
applicative bisimulations, environmental bisimulations
are known to be more robust and do not require
sophisticated techniques such as Howe's in the proofs
of congruence. As representative calculi, call-by-name
and call-by-value \lambda - calculus, and a
(call-by-value) \lambda -calculus extended with
references (i.e., a store) are considered. In each case
full abstraction results are derived for probabilistic
environmental similarity and bisimilarity with respect
to contextual preorder and contextual equivalence,
respectively. Some possible enhancements of the
(bi)simulations, as `up-to techniques', are also
presented. Probabilities force a number of
modifications to the definition of environmental
bisimulations in non-probabilistic languages. Some of
these modifications are specific to probabilities,
others may be seen as general refinements of
environmental bisimulations, applicable also to
non-probabilistic languages. Several examples are
presented, to illustrate the modifications and the
differences.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Flur:2016:MAA,
author = "Shaked Flur and Kathryn E. Gray and Christopher Pulte
and Susmit Sarkar and Ali Sezgin and Luc Maranget and
Will Deacon and Peter Sewell",
title = "Modelling the {ARMv8} architecture, operationally:
concurrency and {ISA}",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "608--621",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837615",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we develop semantics for key aspects of
the ARMv8 multiprocessor architecture: the concurrency
model and much of the 64-bit application-level
instruction set (ISA). Our goal is to clarify what the
range of architecturally allowable behaviour is, and
thereby to support future work on formal verification,
analysis, and testing of concurrent ARM software and
hardware. Establishing such models with high confidence
is intrinsically difficult: it involves capturing the
vendor's architectural intent, aspects of which
(especially for concurrency) have not previously been
precisely defined. We therefore first develop a
concurrency model with a microarchitectural flavour,
abstracting from many hardware implementation concerns
but still close to hardware-designer intuition. This
means it can be discussed in detail with ARM
architects. We then develop a more abstract model,
better suited for use as an architectural
specification, which we prove sound w.r.t.~the first.
The instruction semantics involves further
difficulties, handling the mass of detail and the
subtle intensional information required to interface to
the concurrency model. We have a novel ISA description
language, with a lightweight dependent type system,
letting us do both with a rather direct representation
of the ARM reference manual instruction descriptions.
We build a tool from the combined semantics that lets
one explore, either interactively or exhaustively, the
full range of architecturally allowed behaviour, for
litmus tests and (small) ELF executables. We prove
correctness of some optimisations needed for tool
performance. We validate the models by discussion with
ARM staff, and by comparison against ARM hardware
behaviour, for ISA single- instruction tests and
concurrent litmus tests.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Pichon-Pharabod:2016:CSR,
author = "Jean Pichon-Pharabod and Peter Sewell",
title = "A concurrency semantics for relaxed atomics that
permits optimisation and avoids thin-air executions",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "622--633",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837616",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite much research on concurrent programming
languages, especially for Java and C/C++, we still do
not have a satisfactory definition of their semantics,
one that admits all common optimisations without also
admitting undesired behaviour. Especially problematic
are the ``thin-air'' examples involving
high-performance concurrent accesses, such as C/C++11
relaxed atomics. The C/C++11 model is in a
per-candidate-execution style, and previous work has
identified a tension between that and the fact that
compiler optimisations do not operate over single
candidate executions in isolation; rather, they operate
over syntactic representations that represent all
executions. In this paper we propose a novel approach
that circumvents this difficulty. We define a
concurrency semantics for a core calculus, including
relaxed-atomic and non-atomic accesses, and locks, that
admits a wide range of optimisation while still
forbidding the classic thin-air examples. It also
addresses other problems relating to undefined
behaviour. The basic idea is to use an event-structure
representation of the current state of each thread,
capturing all of its potential executions, and to
permit interleaving of execution and transformation
steps over that to reflect optimisation (possibly
dynamic) of the code. These are combined with a
non-multi-copy-atomic storage subsystem, to reflect
common hardware behaviour. The semantics is defined in
a mechanised and executable form, and designed to be
implementable above current relaxed hardware and strong
enough to support the programming idioms that C/C++11
does for this fragment. It offers a potential way
forward for concurrent programming language semantics,
beyond the current C/C++11 and Java models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Batty:2016:OSA,
author = "Mark Batty and Alastair F. Donaldson and John
Wickerson",
title = "Overhauling {SC} atomics in {C11} and {OpenCL}",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "634--648",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837637",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the conceptual simplicity of sequential
consistency (SC), the semantics of SC atomic operations
and fences in the C11 and OpenCL memory models is
subtle, leading to convoluted prose descriptions that
translate to complex axiomatic formalisations. We
conduct an overhaul of SC atomics in C11, reducing the
associated axioms in both number and complexity. A
consequence of our simplification is that the SC
operations in an execution no longer need to be totally
ordered. This relaxation enables, for the first time,
efficient and exhaustive simulation of litmus tests
that use SC atomics. We extend our improved C11 model
to obtain the first rigorous memory model formalisation
for OpenCL (which extends C11 with support for
heterogeneous many-core programming). In the OpenCL
setting, we refine the SC axioms still further to give
a sensible semantics to SC operations that employ a
`memory scope' to restrict their visibility to specific
threads. Our overhaul requires slight strengthenings of
both the C11 and the OpenCL memory models, causing some
behaviours to become disallowed. We argue that these
strengthenings are natural, and that all of the
formalised C11 and OpenCL compilation schemes of which
we are aware (Power and x86 CPUs for C11, AMD GPUs for
OpenCL) remain valid in our revised models. Using the
HERD memory model simulator, we show that our overhaul
leads to an exponential improvement in simulation time
for C11 litmus tests compared with the original model,
making *exhaustive* simulation competitive, time-wise,
with the *non-exhaustive* CDSChecker tool.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Lahav:2016:TRA,
author = "Ori Lahav and Nick Giannarakis and Viktor Vafeiadis",
title = "Taming release-acquire consistency",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "649--662",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837643",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a strengthening of the release-acquire
fragment of the C11 memory model that (i) forbids
dubious behaviors that are not observed in any
implementation; (ii) supports fence instructions that
restore sequential consistency; and (iii) admits an
equivalent intuitive operational semantics based on
point-to-point communication. This strengthening has no
additional implementation cost: it allows the same
local optimizations as C11 release and acquire
accesses, and has exactly the same compilation schemes
to the x86-TSO and Power architectures. In fact, the
compilation to Power is complete with respect to a
recent axiomatic model of Power; that is, the compiled
program exhibits exactly the same behaviors as the
source one. Moreover, we provide criteria for placing
enough fence instructions to ensure sequential
consistency, and apply them to an efficient RCU
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Reps:2016:NPA,
author = "Thomas Reps and Emma Turetsky and Prathmesh Prabhu",
title = "{Newtonian} program analysis via tensor product",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "663--677",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837659",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recently, Esparza et al. generalized Newton's method
--- a numerical-analysis algorithm for finding roots of
real-valued functions---to a method for finding
fixed-points of systems of equations over semirings.
Their method provides a new way to solve
interprocedural dataflow-analysis problems. As in its
real-valued counterpart, each iteration of their method
solves a simpler ``linearized'' problem. One of the
reasons this advance is exciting is that some numerical
analysts have claimed that ```all' effective and fast
iterative [numerical] methods are forms (perhaps very
disguised) of Newton's method.'' However, there is an
important difference between the dataflow-analysis and
numerical-analysis contexts: when Newton's method is
used on numerical-analysis problems, multiplicative
commutativity is relied on to rearrange expressions of
the form ``c*X + X*d'' into ``(c+d) * X.'' Such
equations correspond to path problems described by
regular languages. In contrast, when Newton's method is
used for interprocedural dataflow analysis, the
``multiplication'' operation involves function
composition, and hence is non-commutative: ``c*X +
X*d'' cannot be rearranged into ``(c+d) * X.'' Such
equations correspond to path problems described by
linear context-free languages (LCFLs). In this paper,
we present an improved technique for solving the LCFL
sub-problems produced during successive rounds of
Newton's method. Our method applies to predicate
abstraction, on which most of today's software model
checkers rely.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Wu:2016:CEA,
author = "Rongxin Wu and Xiao Xiao and Shing-Chi Cheung and
Hongyu Zhang and Charles Zhang",
title = "{Casper}: an efficient approach to call trace
collection",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "678--690",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837619",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Call traces, i.e., sequences of function calls and
returns, are fundamental to a wide range of program
analyses such as bug reproduction, fault diagnosis,
performance analysis, and many others. The conventional
approach to collect call traces that instruments each
function call and return site incurs large space and
time overhead. Our approach aims at reducing the
recording overheads by instrumenting only a small
amount of call sites while keeping the capability of
recovering the full trace. We propose a call trace
model and a logged call trace model based on an LL(1)
grammar, which enables us to define the criteria of a
feasible solution to call trace collection. Based on
the two models, we prove that to collect call traces
with minimal instrumentation is an NP-hard problem. We
then propose an efficient approach to obtaining a
suboptimal solution. We implemented our approach as a
tool Casper and evaluated it using the DaCapo benchmark
suite. The experiment results show that our approach
causes significantly lower runtime (and space) overhead
than two state-of-the-arts approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Gilray:2016:PCF,
author = "Thomas Gilray and Steven Lyde and Michael D. Adams and
Matthew Might and David {Van Horn}",
title = "Pushdown control-flow analysis for free",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "691--704",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837631",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traditional control-flow analysis (CFA) for
higher-order languages introduces spurious connections
between callers and callees, and different invocations
of a function may pollute each other's return flows.
Recently, three distinct approaches have been published
that provide perfect call-stack precision in a
computable manner: CFA2, PDCFA, and AAC. Unfortunately,
implementing CFA2 and PDCFA requires significant
engineering effort. Furthermore, all three are
computationally expensive. For a monovariant analysis,
CFA2 is in O(2^n), PDCFA is in O(n^6), and AAC is in
O(n^8). In this paper, we describe a new technique that
builds on these but is both straightforward to
implement and computationally inexpensive. The crucial
insight is an unusual state-dependent allocation
strategy for the addresses of continuations. Our
technique imposes only a constant-factor overhead on
the underlying analysis and costs only O(n^3) in the
monovariant case. We present the intuitions behind this
development, benchmarks demonstrating its efficacy, and
a proof of the precision of this analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Flatt:2016:BSS,
author = "Matthew Flatt",
title = "Binding as sets of scopes",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "705--717",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837620",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Our new macro expander for Racket builds on a novel
approach to hygiene. Instead of basing macro expansion
on variable renamings that are mediated by expansion
history, our new expander tracks binding through a set
of scopes that an identifier acquires from both binding
forms and macro expansions. The resulting model of
macro expansion is simpler and more uniform than one
based on renaming, and it is sufficiently compatible
with Racket's old expander to be practical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Hasuo:2016:LTP,
author = "Ichiro Hasuo and Shunsuke Shimizu and Corina
C{\^\i}rstea",
title = "Lattice-theoretic progress measures and coalgebraic
model checking",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "718--732",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837673",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the context of formal verification in general and
model checking in particular, parity games serve as a
mighty vehicle: many problems are encoded as parity
games, which are then solved by the seminal algorithm
by Jurdzinski. In this paper we identify the essence of
this workflow to be the notion of progress measure, and
formalize it in general, possibly infinitary,
lattice-theoretic terms. Our view on progress measures
is that they are to nested/alternating fixed points
what invariants are to safety/greatest fixed points,
and what ranking functions are to liveness/least fixed
points. That is, progress measures are combination of
the latter two notions (invariant and ranking function)
that have been extensively studied in the context of
(program) verification. We then apply our theory of
progress measures to a general model-checking
framework, where systems are categorically presented as
coalgebras. The framework's theoretical robustness is
witnessed by a smooth transfer from the branching-time
setting to the linear-time one. Although the framework
can be used to derive some decision procedures for
finite settings, we also expect the proposed framework
to form a basis for sound proof methods for some
undecidable/infinitary problems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Chatterjee:2016:AAP,
author = "Krishnendu Chatterjee and Amir Kafshdar Goharshady and
Rasmus Ibsen-Jensen and Andreas Pavlogiannis",
title = "Algorithms for algebraic path properties in concurrent
systems of constant treewidth components",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "733--747",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837624",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study algorithmic questions for concurrent systems
where the transitions are labeled from a complete,
closed semiring, and path properties are algebraic with
semiring operations. The algebraic path properties can
model dataflow analysis problems, the shortest path
problem, and many other natural problems that arise in
program analysis. We consider that each component of
the concurrent system is a graph with constant
treewidth, a property satisfied by the controlflow
graphs of most programs. We allow for multiple possible
queries, which arise naturally in demand driven
dataflow analysis. The study of multiple queries allows
us to consider the tradeoff between the resource usage
of the one-time preprocessing and for each individual
query. The traditional approach constructs the product
graph of all components and applies the best-known
graph algorithm on the product. In this approach, even
the answer to a single query requires the transitive
closure (i.e., the results of all possible queries),
which provides no room for tradeoff between
preprocessing and query time. Our main contributions
are algorithms that significantly improve the
worst-case running time of the traditional approach,
and provide various tradeoffs depending on the number
of queries. For example, in a concurrent system of two
components, the traditional approach requires hexic
time in the worst case for answering one query as well
as computing the transitive closure, whereas we show
that with one-time preprocessing in almost cubic time,
each subsequent query can be answered in at most linear
time, and even the transitive closure can be computed
in almost quartic time. Furthermore, we establish
conditional optimality results showing that the
worst-case running time of our algorithms cannot be
improved without achieving major breakthroughs in graph
algorithms (i.e., improving the worst-case bound for
the shortest path problem in general graphs).
Preliminary experimental results show that our
algorithms perform favorably on several benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Muroya:2016:MGI,
author = "Koko Muroya and Naohiko Hoshino and Ichiro Hasuo",
title = "Memoryful geometry of interaction {II}: recursion and
adequacy",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "748--760",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837672",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A general framework of Memoryful Geometry of
Interaction (mGoI) is introduced recently by the
authors. It provides a sound translation of
lambda-terms (on the high-level) to their realizations
by stream transducers (on the low-level), where the
internal states of the latter (called memories) are
exploited for accommodating algebraic effects of
Plotkin and Power. The translation is compositional,
hence ``denotational,'' where transducers are
inductively composed using an adaptation of Barbosa's
coalgebraic component calculus. In the current paper we
extend the mGoI framework and provide a systematic
treatment of recursion---an essential feature of
programming languages that was however missing in our
previous work. Specifically, we introduce two new
fixed-point operators in the coalgebraic component
calculus. The two follow the previous work on recursion
in GoI and are called Girard style and Mackie style:
the former obviously exhibits some nice
domain-theoretic properties, while the latter allows
simpler construction. Their equivalence is established
on the categorical (or, traced monoidal) level of
abstraction, and is therefore generic with respect to
the choice of algebraic effects. Our main result is an
adequacy theorem of our mGoI translation, against
Plotkin and Power's operational semantics for algebraic
effects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Raychev:2016:LPN,
author = "Veselin Raychev and Pavol Bielik and Martin Vechev and
Andreas Krause",
title = "Learning programs from noisy data",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "761--774",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837671",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new approach for learning programs from
noisy datasets. Our approach is based on two new
concepts: a regularized program generator which
produces a candidate program based on a small sample of
the entire dataset while avoiding overfitting, and a
dataset sampler which carefully samples the dataset by
leveraging the candidate program's score on that
dataset. The two components are connected in a
continuous feedback-directed loop. We show how to apply
this approach to two settings: one where the dataset
has a bound on the noise, and another without a noise
bound. The second setting leads to a new way of
performing approximate empirical risk minimization on
hypotheses classes formed by a discrete search space.
We then present two new kinds of program synthesizers
which target the two noise settings. First, we
introduce a novel regularized bitstream synthesizer
that successfully generates programs even in the
presence of incorrect examples. We show that the
synthesizer can detect errors in the examples while
combating overfitting --- a major problem in existing
synthesis techniques. We also show how the approach can
be used in a setting where the dataset grows
dynamically via new examples (e.g., provided by a
human). Second, we present a novel technique for
constructing statistical code completion systems. These
are systems trained on massive datasets of open source
programs, also known as ``Big Code''. The key idea is
to introduce a domain specific language (DSL) over
trees and to learn functions in that DSL directly from
the dataset. These learned functions then condition the
predictions made by the system. This is a flexible and
powerful technique which generalizes several existing
works as we no longer need to decide a priori on what
the prediction should be conditioned (another benefit
is that the learned functions are a natural mechanism
for explaining the prediction). As a result, our code
completion system surpasses the prediction capabilities
of existing, hard-wired systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Bornholt:2016:OSM,
author = "James Bornholt and Emina Torlak and Dan Grossman and
Luis Ceze",
title = "Optimizing synthesis with metasketches",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "775--788",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837666",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many advanced programming tools---for both end-users
and expert developers---rely on program synthesis to
automatically generate implementations from high-level
specifications. These tools often need to employ
tricky, custom-built synthesis algorithms because they
require synthesized programs to be not only correct,
but also optimal with respect to a desired cost metric,
such as program size. Finding these optimal solutions
efficiently requires domain-specific search strategies,
but existing synthesizers hard-code the strategy,
making them difficult to reuse. This paper presents
metasketches, a general framework for specifying and
solving optimal synthesis problems. metasketches make
the search strategy a part of the problem definition by
specifying a fragmentation of the search space into an
ordered set of classic sketches. We provide two
cooperating search algorithms to effectively solve
metasketches. A global optimizing search coordinates
the activities of local searches, informing them of the
costs of potentially-optimal solutions as they explore
different regions of the candidate space in parallel.
The local searches execute an incremental form of
counterexample-guided inductive synthesis to
incorporate information sent from the global search. We
present Synapse, an implementation of these algorithms,
and show that it effectively solves optimal synthesis
problems with a variety of different cost functions. In
addition, metasketches can be used to accelerate
classic (non-optimal) synthesis by explicitly
controlling the search strategy, and we show that
Synapse solves classic synthesis problems that
state-of-the-art tools cannot.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Albarghouthi:2016:MSS,
author = "Aws Albarghouthi and Isil Dillig and Arie Gurfinkel",
title = "Maximal specification synthesis",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "789--801",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837628",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many problems in program analysis, verification, and
synthesis require inferring specifications of unknown
procedures. Motivated by a broad range of applications,
we formulate the problem of maximal specification
inference: Given a postcondition Phi and a program P
calling a set of unknown procedures F_1,...,F_n, what
are the most permissive specifications of procedures
F_i that ensure correctness of P? In other words, we
are looking for the smallest number of assumptions we
need to make about the behaviours of F_i in order to
prove that $P$ satisfies its postcondition. To solve
this problem, we present a novel approach that utilizes
a counterexample-guided inductive synthesis loop and
reduces the maximal specification inference problem to
multi-abduction. We formulate the novel notion of
multi-abduction as a generalization of classical
logical abduction and present an algorithm for solving
multi-abduction problems. On the practical side, we
evaluate our specification inference technique on a
range of benchmarks and demonstrate its ability to
synthesize specifications of kernel routines invoked by
device drivers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Frankle:2016:EDS,
author = "Jonathan Frankle and Peter-Michael Osera and David
Walker and Steve Zdancewic",
title = "Example-directed synthesis: a type-theoretic
interpretation",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "802--815",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837629",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Input-output examples have emerged as a practical and
user-friendly specification mechanism for program
synthesis in many environments. While example-driven
tools have demonstrated tangible impact that has
inspired adoption in industry, their underlying
semantics are less well-understood: what are
``examples'' and how do they relate to other kinds of
specifications? This paper demonstrates that examples
can, in general, be interpreted as refinement types.
Seen in this light, program synthesis is the task of
finding an inhabitant of such a type. This insight
provides an immediate semantic interpretation for
examples. Moreover, it enables us to exploit decades of
research in type theory as well as its correspondence
with intuitionistic logic rather than designing ad hoc
theoretical frameworks for synthesis from scratch. We
put this observation into practice by formalizing
synthesis as proof search in a sequent calculus with
intersection and union refinements that we prove to be
sound with respect to a conventional type system. In
addition, we show how to handle negative examples,
which arise from user feedback or counterexample-guided
loops. This theory serves as the basis for a prototype
implementation that extends our core language to
support ML-style algebraic data types and structurally
inductive functions. Users can also specify synthesis
goals using polymorphic refinements and import
monomorphic libraries. The prototype serves as a
vehicle for empirically evaluating a number of
different strategies for resolving the nondeterminism
of the sequent calculus---bottom-up theorem-proving,
term enumeration with refinement type checking, and
combinations of both---the results of which classify,
explain, and validate the design choices of existing
synthesis systems. It also provides a platform for
measuring the practical value of a specification
language that combines ``examples'' with the more
general expressiveness of refinements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '16 conference proceedings.",
}
@Article{Homer:2016:ALG,
author = "Michael Homer and Timothy Jones and James Noble",
title = "From {APIs} to languages: generalising method names",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "1--12",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816708",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Method names with multiple separate parts are a
feature of many dynamic languages derived from
Smalltalk. Generalising the syntax of method names to
allow parts to be repeated, optional, or alternatives,
means a single definition can respond to a whole family
of method requests. We show how generalising method
names can support flexible APIs for domain-specific
languages, complex initialisation tasks, and control
structures defined in libraries. We describe how we
have extended Grace to support generalised method
names, and prove that such an extension can be
integrated into a gradually-typed language while
preserving type soundness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Maidl:2016:FTL,
author = "Andr{\'e} Murbach Maidl and Fabio Mascarenhas and
Roberto Ierusalimschy",
title = "A formalization of typed {\tt lua}",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "13--25",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816709",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers often migrate from a dynamically typed to
a statically typed language when their simple scripts
evolve into complex programs. Optional type systems are
one way of having both static and dynamic typing in the
same language, while keeping its dynamically typed
semantics. This makes evolving a program from dynamic
to static typing a matter of describing the implied
types that it is using and adding annotations to make
those types explicit. Designing an optional type system
for an existing dynamically typed language is
challenging, as its types should feel natural to
programmers that are already familiar with this
language. In this work, we give a formal description of
Typed Lua, an optional type system for Lua, with a
focus on two of its novel type system features:
incremental evolution of imperative record and object
types that is both lightweight and type-safe, and
projection types, a combination of flow typing,
functions that return multiple values, and multiple
assignment. While our type system is tailored to the
features and idioms of Lua, its features can be adapted
to other imperative scripting languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Tanter:2016:GCP,
author = "{\'E}ric Tanter and Nicolas Tabareau",
title = "Gradual certified programming in {\tt coq}",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "26--40",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816710",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Expressive static typing disciplines are a powerful
way to achieve high-quality software. However, the
adoption cost of such techniques should not be
under-estimated. Just like gradual typing allows for a
smooth transition from dynamically-typed to
statically-typed programs, it seems desirable to
support a gradual path to certified programming. We
explore gradual certified programming in Coq, providing
the possibility to postpone the proofs of selected
properties, and to check ``at runtime'' whether the
properties actually hold. Casts can be integrated with
the implicit coercion mechanism of Coq to support
implicit cast insertion {\`a} la gradual typing.
Additionally, when extracting Coq functions to
mainstream languages, our encoding of casts supports
lifting assumed properties into runtime checks. Much to
our surprise, it is not necessary to extend Coq in any
way to support gradual certified programming. A simple
mix of type classes and axioms makes it possible to
bring gradual certified programming to Coq in a
straightforward manner.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Ernst:2016:MSD,
author = "Erik Ernst and Anders M{\o}ller and Mathias Schwarz
and Fabio Strocco",
title = "Message safety in {Dart}",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "41--53",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816711",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Unlike traditional static type checking, the type
system in the Dart programming language is unsound by
design, even for fully annotated programs. The
rationale has been that this allows compile-time
detection of likely errors and enables code completion
in integrated development environments, without being
restrictive on programmers. Despite unsoundness,
judicious use of type annotations can ensure useful
properties of the runtime behavior of Dart programs. We
present a formal model of a core of Dart with a focus
on its type system, which allows us to elucidate the
causes of unsoundness. Our main contribution is a
characterization of message-safe programs and a theorem
stating that such programs will never encounter
'message not understood' errors at runtime. Message
safety is less restrictive than traditional type
soundness, and we argue that it forms a natural
intermediate point between dynamically typed and
statically typed Dart programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Lyde:2016:CFA,
author = "Steven Lyde and William E. Byrd and Matthew Might",
title = "Control-flow analysis of dynamic languages via pointer
analysis",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "54--62",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816712",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We demonstrate how to map a control-flow analysis for
a higher-order language (dynamic languages are
typically higher-order) into a pointer analysis for a
first-order language, such as C. This allows us to use
existing pointer analysis tools to perform a
control-flow analysis, exploiting their technical
advancements and the engineering effort that went into
developing them. We compare the results of two recent
parallel pointer analysis tools with a parallel
control-flow analysis tool. While it has been known
that a control-flow analysis of higher-order languages
and a pointer analysis of first-order languages are
very similar, we demonstrate that these two analyses
are actually more similar than previously thought. We
present the first mapping between a high-order
control-flow analysis and a pointer analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Feeley:2016:CML,
author = "Marc Feeley",
title = "Compiling for multi-language task migration",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "63--77",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816713",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Task migration allows a running program to continue
its execution in a different destination environment.
Increasingly, execution environments are defined by
combinations of cultural and technological constraints,
affecting the choice of host language, libraries and
tools. A compiler supporting multiple target
environments and task migration must be able to marshal
continuations and then unmarshal and continue their
execution, ideally, even if the language of the
destination environment is different. In this paper, we
propose a compilation approach based on a virtual
machine that strikes a balance between implementation
portability and efficiency. We explain its
implementation within a Scheme compiler targeting
JavaScript, PHP, Python, Ruby and Java --- some of the
most popular host languages for web applications. As
our experiments show, this approach compares well with
other Scheme compilers targeting high-level languages
in terms of execution speed, being sometimes up to 3
orders of magnitude faster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Grimmer:2016:HPC,
author = "Matthias Grimmer and Chris Seaton and Roland Schatz
and Thomas W{\"u}rthinger and Hanspeter
M{\"o}ssenb{\"o}ck",
title = "High-performance cross-language interoperability in a
multi-language runtime",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "78--90",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816714",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers combine different programming languages
because it allows them to use the most suitable
language for a given problem, to gradually migrate
existing projects from one language to another, or to
reuse existing source code. However, existing
cross-language mechanisms suffer from complex
interfaces, insufficient flexibility, or poor
performance. We present the TruffleVM, a multi-language
runtime that allows composing different language
implementations in a seamless way. It reduces the
amount of required boiler-plate code to a minimum by
allowing programmers to access foreign functions or
objects by using the notation of the host language. We
compose language implementations that translate source
code to an intermediate representation (IR), which is
executed on top of a shared runtime system. Language
implementations use language-independent messages that
the runtime resolves at their first execution by
transforming them to efficient
foreign-language-specific operations. The TruffleVM
avoids conversion or marshaling of foreign objects at
the language boundary and allows the dynamic compiler
to perform its optimizations across language
boundaries, which guarantees high performance. This
paper presents an implementation of our ideas based on
the Truffle system and its guest language
implementations JavaScript, Ruby, and C.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Leopoldseder:2016:JJT,
author = "David Leopoldseder and Lukas Stadler and Christian
Wimmer and Hanspeter M{\"o}ssenb{\"o}ck",
title = "{Java-to-JavaScript} translation via structured
control flow reconstruction of compiler {IR}",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "91--103",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816715",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an approach to cross-compile Java bytecodes
to Java-Script, building on existing Java optimizing
compiler technology. Static analysis determines which
Java classes and methods are reachable. These are then
translated to JavaScript using a re-configured Java
just-in-time compiler with a new back end that
generates JavaScript instead of machine code. Standard
compiler optimizations such as method inlining and
global value numbering, as well as advanced
optimizations such as escape analysis, lead to compact
and optimized JavaScript code. Compiler IR is
unstructured, so structured control flow needs to be
reconstructed before code generation is possible. We
present details of our control flow reconstruction
algorithm. Our system is based on Graal, an open-source
optimizing compiler for the Java HotSpot VM and other
VMs. The modular and VM-independent architecture of
Graal allows us to reuse the intermediate
representation, the bytecode parser, and the high-level
optimizations. Our custom back end first performs
control flow reconstruction and then JavaScript code
generation. The generated JavaScript undergoes a set of
optimizations to increase readability and performance.
Static analysis is performed on the Graal intermediate
representation as well. Benchmark results for
medium-sized Java benchmarks such as SPECjbb2005 run
with acceptable performance on the V8 JavaScript VM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Pape:2016:LIS,
author = "Tobias Pape and Tim Felgentreff and Robert Hirschfeld
and Anton Gulenko and Carl Friedrich Bolz",
title = "Language-independent storage strategies for tracing
{JIT}-based virtual machines",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "104--113",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816716",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Storage strategies have been proposed as a run-time
optimization for the PyPy Python implementation and
have shown promising results for optimizing execution
speed and memory requirements. However, it remained
unclear whether the approach works equally well in
other dynamic languages. Furthermore, while PyPy is
based on RPython, a language to write VMs with reusable
components such as a tracing just-in-time compiler and
garbage collection, the strategies design itself was
not generalized to be reusable across languages
implemented using that same toolchain. In this paper,
we present a general design and implementation for
storage strategies and show how they can be reused
across different RPython-based languages. We evaluate
the performance of our implementation for RSqueak, an
RPython-based VM for Squeak/Smalltalk and show that
storage strategies may indeed offer performance
benefits for certain workloads in other dynamic
programming languages.We furthermore evaluate the
generality of our implementation by applying it to
Topaz, a Ruby VM, and Pycket, a Racket
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Aakerblom:2016:MPP,
author = "Beatrice {\AA}kerblom and Tobias Wrigstad",
title = "Measuring polymorphism in {Python} programs",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "114--128",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816717",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Following the increased popularity of dynamic
languages and their increased use in critical software,
there have been many proposals to retrofit static type
system to these languages to improve possibilities to
catch bugs and improve performance. A key question for
any type system is whether the types should be
structural, for more expressiveness, or nominal, to
carry more meaning for the programmer. For retrofitted
type systems, it seems the current trend is using
structural types. This paper attempts to answer the
question to what extent this extra expressiveness is
needed, and how the possible polymorphism in dynamic
code is used in practise. We study polymorphism in 36
real-world open source Python programs and approximate
to what extent nominal and structural types could be
used to type these programs. The study is based on
collecting traces from multiple runs of the programs
and analysing the polymorphic degrees of targets at
more than 7 million call-sites. Our results show that
while polymorphism is used in all programs, the
programs are to a great extent monomorphic. The
polymorphism found is evenly distributed across
libraries and program-specific code and occur both
during program start-up and normal execution. Most
programs contain a few ``megamorphic'' call-sites where
receiver types vary widely. The non-monomorphic parts
of the programs can to some extent be typed with
nominal or structural types, but none of the approaches
can type entire programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Alcocer:2016:TPV,
author = "Juan Pablo Sandoval Alcocer and Alexandre Bergel",
title = "Tracking down performance variation against source
code evolution",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "129--139",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816718",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Little is known about how software performance evolves
across software revisions. The severity of this
situation is high since (i) most performance variations
seem to happen accidentally and (ii) addressing a
performance regression is challenging, especially when
functional code is stacked on it. This paper reports an
empirical study on the performance evolution of 19
applications, totaling over 19 MLOC. It took 52 days to
run our 49 benchmarks. By relating performance
variation with source code revisions, we found out
that: (i) 1 out of every 3 application revisions
introduces a performance variation, (ii) performance
variations may be classified into 9 patterns, (iii) the
most prominent cause of performance regression involves
loops and collections. We carefully describe the
patterns we identified, and detail how we addressed the
numerous challenges we faced to complete our
experiment.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Kedlaya:2016:SST,
author = "Madhukar N. Kedlaya and Behnam Robatmili and Ben
Hardekopf",
title = "Server-side type profiling for optimizing client-side
{JavaScript} engines",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "140--153",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816719",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern JavaScript engines optimize hot functions using
a JIT compiler along with type information gathered by
an online profiler. However, the profiler's information
can be unsound and when unexpected types are
encountered the engine must recover using an expensive
mechanism called deoptimization. In this paper we
describe a method to significantly reduce the number of
deoptimizations observed by client-side JavaScript
engines by using ahead-of-time profiling on the
server-side. Unlike previous work on ahead-of-time
profiling for statically-typed languages such as Java,
our technique must operate on a dynamically-typed
language, which significantly changes the required
insights and methods to make the technique effective.
We implement our proposed technique using the
SpiderMonkey JavaScript engine, and we evaluate our
implementation using three different kinds of
benchmarks: the industry-standard Octane benchmark
suite, a set of JavaScript physics engines, and a set
of real-world websites from the Membench50 benchmark
suite. We show that using ahead-of-time profiling
provides significant performance benefits over the
baseline vanilla SpiderMonkey engine.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Fischer:2016:EIE,
author = "Lars Fischer and Stefan Hanenberg",
title = "An empirical investigation of the effects of type
systems and code completion on {API} usability using
{TypeScript} and {JavaScript} in {MS Visual Studio}",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "154--167",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816720",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent empirical studies that compared static and
dynamic type systems on API usability showed a positive
impact of static type systems on developer productivity
in most cases. Nevertheless, it is unclear how large
this effect is in comparison to other factors. One
obvious factor in programming is tooling: It is
commonly accepted that modern IDEs have a large
positive impact on developers, although it is not clear
which parts of modern IDEs are responsible for that.
One possible---and for most developers obvious
candidate---is code completion. This paper describes a
2x2 randomized trial that compares JavaScript and
Microsoft's statically typed alternative TypeScript
with and without code completion in MS Visual Studio.
While the experiment shows (in correspondence to
previous experiments) a large positive effect of the
statically typed language TypeScript, the code
completion effect is not only marginal, but also just
approaching statistical significance. This seems to be
an indicator that the effect of static type systems is
larger than often assumed, at least in comparison to
code completion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Teruel:2016:ACR,
author = "Camille Teruel and St{\'e}phane Ducasse and Damien
Cassou and Marcus Denker",
title = "Access control to reflection with object ownership",
journal = j-SIGPLAN,
volume = "51",
number = "2",
pages = "168--176",
month = feb,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936313.2816721",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reflection is a powerful programming language feature
that enables language extensions, generic code, dynamic
analyses, development tools, etc. However, uncontrolled
reflection breaks object encapsulation and considerably
increases the attack surface of programs e.g.,
malicious libraries can use reflection to attack their
client applications. To bring reflection and object
encapsulation back together, we use dynamic object
ownership to design an access control policy to
reflective operations. This policy grants objects full
reflective power over the objects they own but limited
reflective power over other objects. Code is still able
to use advanced reflective operations but reflection
cannot be used as an attack vector anymore.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '15 conference proceedings.",
}
@Article{Narasimhan:2016:NGS,
author = "Priya Narasimhan and Utsav Drolia and Jiaqi Tan and
Nathan D. Mickulicz and Rajeev Gandhi",
title = "The next-generation in-stadium experience (keynote)",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "1--10",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814205",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "YinzCam is a cloud-hosted service that provides sports
fans with real-time scores, news, photos, statistics,
live radio, streaming video, etc., on their mobile
devices. YinzCam's infrastructure is currently hosted
on Amazon Web Services (AWS) and supports over 30
million installs of the official mobile apps of 140+
NHL/NFL/NBA/NRL/NCAA sports teams and venues. YinzCam's
workload is necessarily multi-modal (e.g., pre-game,
in-game, post-game, game-day, non-gameday), with normal
game-time traffic being twenty-fold of that on non-game
days. This paper describes the evolution of YinzCam's
production architecture and distributed infrastructure,
from its beginnings in 2009, when it was used to
support thousands of concurrent users, to today's
system that supports millions of concurrent users on
any game day. We also discuss key new opportunities to
improve the fan experience inside the stadium of the
future, without impacting the available bandwidth, by
crowd-sourcing the thousands of mobile devices that are
in fans' hands inside these venues. We present Krowd, a
novel distributed key-value store for promoting
efficient content sharing, discovery and retrieval
across the mobile devices inside a stadium. We present
CHIPS, a system that ensures that users' privacy is
maintained while their devices participate in the
crowdsourced infrastructure.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Leissa:2016:SED,
author = "Roland Lei{\ss}a and Klaas Boesche and Sebastian Hack
and Richard Membarth and Philipp Slusallek",
title = "Shallow embedding of {DSLs} via online partial
evaluation",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "11--20",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814208",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper investigates shallow embedding of DSLs by
means of online partial evaluation. To this end, we
present a novel online partial evaluator for
continuation-passing style languages. We argue that it
has, in contrast to prior work, a predictable
termination policy that works well in practice. We
present our approach formally using a
continuation-passing variant of PCF and prove its
termination properties. We evaluate our technique
experimentally in the field of visual and
high-performance computing and show that our evaluator
produces highly specialized and efficient code for CPUs
as well as GPUs that matches the performance of
hand-tuned expert code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Scherr:2016:AFC,
author = "Maximilian Scherr and Shigeru Chiba",
title = "Almost first-class language embedding: taming staged
embedded {DSLs}",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "21--30",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814217",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded domain-specific languages (EDSLs), inheriting
a general-purpose language's features as well as
look-and-feel, have traditionally been second-class or
rather non-citizens in terms of host-language design.
This makes sense when one regards them to be on the
same level as traditional, non-EDSL library interfaces.
However, this equivalence only applies to the simplest
of EDSLs. In this paper we illustrate why this is
detrimental when moving on to EDSLs that employ
staging, i.e. program reification, by example of
various issues that affect authors and users alike. We
believe that if EDSLs are to be considered a reliable,
language-like interface abstraction, they require
exceptional attention and design scrutiny. Instead of
unenforceable conventions, we advocate the acceptance
of EDSLs as proper, i.e. almost first-class, citizens
while retaining most advantages of pure embeddings. As
a small step towards this goal, we present a pragmatic
framework prototype for Java. It is based on
annotations that explicate and document membership to
explicit EDSL entities. In a nutshell, our framework
identifies (annotated) method calls and field accesses
as EDSL terms and dynamically constructs an
abstract-syntax representation, which is eventually
passed to a semantics-defining back end implemented by
the EDSL author.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Reynders:2016:GSB,
author = "Bob Reynders and Dominique Devriese and Frank
Piessens",
title = "Generating safe boundary {APIs} between typed {EDSLs}
and their environments",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "31--34",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814219",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded domain specific languages (EDSLs) are used to
represent special-purpose code in a general-purpose
language and they are used for applications like vector
calculations and run-time code generation. Often, code
in an EDSL is compiled to a target (e.g. GPU languages,
JVM bytecode, assembly, JavaScript) and needs to
interface with other code that is available at that
level but uses other data representations or calling
conventions. We present an approach for safely making
available such APIs in a typed EDSL, guaranteeing
correct conversions between data representations and
the respect for calling conventions. When the code
being interfaced with is the result of static
compilation of host language code, we propose a way to
auto-generate the needed boilerplate using
meta-programming. We instantiate our technique with
JavaScript as the target language, JS-Scala as the
EDSL, Scala.js as the static compiler and Scala macros
to generate the boilerplate, but our design is more
generally applicable. We provide evidence of usefulness
of our approach through a prototype implementation that
we have applied in a non-trivial code base.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Medeiros:2016:ESC,
author = "Fl{\'a}vio Medeiros and Iran Rodrigues and M{\'a}rcio
Ribeiro and Leopoldo Teixeira and Rohit Gheyi",
title = "An empirical study on configuration-related issues:
investigating undeclared and unused identifiers",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "35--44",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814206",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The variability of configurable systems may lead to
configuration-related issues (i.e., faults and
warnings) that appear only when we select certain
configuration options. Previous studies found that
issues related to configurability are harder to detect
than issues that appear in all configurations, because
variability increases the complexity. However, little
effort has been put into understanding
configuration-related faults (e.g., undeclared
functions and variables) and warnings (e.g., unused
functions and variables). To better understand the
peculiarities of configuration-related
undeclared/unused variables and functions, in this
paper we perform an empirical study of 15 systems to
answer research questions related to how developers
introduce these issues, the number of configuration
options involved, and the time that these issues remain
in source files. To make the analysis of several
projects feasible, we propose a strategy that minimizes
the initial setup problems of variability-aware tools.
We detect and confirm 2 undeclared variables, 14
undeclared functions, 16 unused variables, and 7 unused
functions related to configurability. We submit 30
patches to fix issues not fixed by developers. Our
findings support the effectiveness of sampling (i.e.,
analysis of only a subset of valid configurations)
because most issues involve two or less configuration
options. Nevertheless, by analyzing the version history
of the projects, we observe that a number of issues
remain in the code for several years. Furthermore, the
corpus of undeclared/unused variables and functions
gathered is a valuable source to study these issues,
compare sampling algorithms, and test and improve
variability-aware tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{El-Sharkawy:2016:AKS,
author = "Sascha El-Sharkawy and Adam Krafczyk and Klaus
Schmid",
title = "Analysing the {Kconfig} semantics and its analysis
tools",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "45--54",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814222",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Linux kernel is often used as a real world case
study to demonstrate novel Software Product Line
Engineering research methods. An important point in
this is often the analysis of the Kconfig semantics.
However, we detected that the semantics of Kconfig is
rather unclear and has many special cases, which are
not documented in its short specification. We performed
a systematic analysis to uncover the correct behaviour
of Kconfig and present the results, which are necessary
for applying semantically correct analyses. Further, we
analyse existing analysis tools of the research
community whether they are aware of the correct
semantics of Kconfig. These analyses can be used for
improving existing analysis tools as well as decision
support for selecting an appropriate tool for a
specific analysis. In summary we contribute to a better
understanding of Kconfig in the research community to
improve the validity of evaluations based on Linux.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Alshara:2016:MLO,
author = "Zakarea Alshara and Abdelhak-Djamel Seriai and Chouki
Tibermacine and Hinde Lilia Bouziane and Christophe
Dony and Anas Shatnawi",
title = "Migrating large object-oriented applications into
component-based ones: instantiation and inheritance
transformation",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "55--64",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814223",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large object-oriented applications have complex and
numerous dependencies, and usually do not have explicit
software architectures. Therefore they are hard to
maintain, and parts of them are difficult to reuse.
Component-based development paradigm emerged for
improving these aspects and for supporting effective
maintainability and reuse. It provides better
understandability through a high-level architecture
view of the application. Thereby migrating
object-oriented applications to component-based ones
will contribute to improve these characteristics
(maintainability and reuse). In this paper, we propose
an approach to automatically transform object-oriented
applications to component-based ones. More
particularly, the input of the approach is the result
provided by software architecture recovery: a
component-based architecture description. Then, our
approach transforms the object-oriented source code in
order to produce deployable components. We focus in
this paper on the transformation of source code related
to instantiation and inheritance dependencies between
classes that are in different components. We
experimented the proposed solution in the
transformation of a collection of Java applications
into the OSGi framework. The experimental results are
discussed in this paper.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Lopez:2016:SSP,
author = "Michael Lopez and C. Jasson Casey and Gabriel {Dos
Reis} and Colton Chojnacki",
title = "Safer {SDN} programming through {Arbiter}",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "65--74",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814218",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software Defined Networking (SDN) programs are written
with respect to assumptions on software and hardware
facilities and protocol definitions. Silent mismatches
between the expected feature set and implemented
feature set of SDN artifacts can easily lead to hard to
debug network configurations, decreased network
performance, outages, or worse, security
vulnerabilities. We show how the paradigm of axiomatic
programming, supported by practical dependent types,
provides effective support for SDN executable
specifications and verification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Kolesnichenko:2016:CBG,
author = "Alexey Kolesnichenko and Christopher M. Poskitt and
Sebastian Nanz and Bertrand Meyer",
title = "Contract-based general-purpose {GPU} programming",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "75--84",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814216",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Using GPUs as general-purpose processors has
revolutionized parallel computing by offering, for a
large and growing set of algorithms, massive
data-parallelization on desktop machines. An obstacle
to widespread adoption, however, is the difficulty of
programming them and the low-level control of the
hardware required to achieve good performance. This
paper suggests a programming library, SafeGPU, that
aims at striking a balance between programmer
productivity and performance, by making GPU
data-parallel operations accessible from within a
classical object-oriented programming language. The
solution is integrated with the design-by-contract
approach, which increases confidence in functional
program correctness by embedding executable program
specifications into the program text. We show that our
library leads to modular and maintainable code that is
accessible to GPGPU non-experts, while providing
performance that is comparable with hand-written CUDA
code. Furthermore, runtime contract checking turns out
to be feasible, as the contracts can be executed on the
GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Yamaguchi:2016:IMS,
author = "Hiroshi Yamaguchi and Shigeru Chiba",
title = "Inverse macro in {Scala}",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "85--94",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814213",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new variant of typed syntactic macro
systems named inverse macro, which improves the
expressiveness of macro systems. The inverse macro
system enables to implement operators with complex
side-effects, such as lazy operators and delimited
continuation operators, which are beyond the power of
existing macro systems. We have implemented the inverse
macro system as an extension to Scala 2.11. We also
show the expressiveness of the inverse macro system by
comparing two versions of shift/reset, bundled in Scala
2.11 and implemented with the inverse macro system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Adam:2016:TTS,
author = "Sorin Adam and Ulrik Pagh Schultz",
title = "Towards tool support for spreadsheet-based
domain-specific languages",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "95--98",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814215",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Spreadsheets are commonly used by non-programmers to
store data in a structured form, this data can in some
cases be considered to be a program in a
domain-specific language (DSL). Unlike ordinary
text-based domain-specific languages, there is however
currently no formalism for expressing the syntax of
such spreadsheet-based DSLs (SDSLs), and there is no
tool support for automatically generating language
infrastructure such as parsers and IDE support. In this
paper we define a simple notion of two-dimensional
grammars for SDSLs, and show how such grammars can be
used for automatically generating parsers that extract
structured data from a spreadsheet in the form of an
AST. We demonstrate automatic generation of parsers for
a number of examples, including the questionnaire DSL
from LWC2014 and a DSL for writing safety
specifications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Byalik:2016:NNA,
author = "Antuan Byalik and Sanchit Chadha and Eli Tilevich",
title = "Native-$2$-native: automated cross-platform code
synthesis from web-based programming resources",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "99--108",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814210",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For maximal market penetration, popular mobile
applications are typically supported on all major
platforms, including Android and iOS. Despite the vast
differences in the look-and-feel of major mobile
platforms, applications running on these platforms in
essence provide the same core functionality. As an
application is maintained and evolved, the resulting
changes must be replicated on all the supported
platforms, a tedious and error-prone programming
process. Existing automated source-to-source
translation tools prove inadequate due to the
structural and idiomatic differences in how
functionalities are expressed across major platforms.
In this paper, we present a new
approach---Native-2-Native---that automatically
synthesizes code for a mobile application to make use
of native resources on one platform, based on the
equivalent program transformations performed on another
platform. First, the programmer modifies a mobile
application's Android version to make use of some
native resource, with a plugin capturing code changes.
Based on the changes, the system then parameterizes a
web search query over popular programming resources
(e.g., Google Code, StackOverflow, etc.), to discover
equivalent iOS code blocks with the closest similarity
to the programmer-written Android code. The discovered
iOS code block is then presented to the programmer as
an automatically synthesized Swift source file to
further fine-tune and subsequently integrate in the
mobile application's iOS version. Our evaluation,
enhancing mobile applications to make use of common
native resources, shows that the presented approach can
correctly synthesize more than 86\% of Swift code for
the subject applications' iOS versions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Makarov:2016:CMS,
author = "Dmitri Makarov and Matthias Hauswirth",
title = "{CLOP}: a multi-stage compiler to seamlessly embed
heterogeneous code",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "109--112",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814211",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous programming complicates software
development. We present CLOP, a platform that embeds
code targeting heterogeneous compute devices in a
convenient and clean way, allowing unobstructed data
flow between the host code and the devices, reducing
the amount of source code by an order of magnitude. The
CLOP compiler uses the standard facilities of the D
programming language to generate code strictly at
compile-time. In this paper we describe the CLOP
language and the CLOP compiler implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Ringert:2016:CCG,
author = "Jan Oliver Ringert and Bernhard Rumpe and Andreas
Wortmann",
title = "Composing code generators for {C\&C} {ADLs} with
Application-specific behavior languages (tool
demonstration)",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "113--116",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814224",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modeling software systems as component {\&} connector
architectures with application-specific behavior
modeling languages enables domain experts to describe
each component behavior with the most appropriate
language. Generating executable systems for such
language aggregates requires composing appropriate code
generators for the participating languages. Previous
work on code generator composition either focuses on
white-box integration based on code generator internals
or requires extensive handcrafting of integration code.
We demonstrate an approach to black-box generator
composition for architecture description languages that
relies on explicit interfaces and exploits the
encapsulation of components. This approach is
implemented for the architecture modeling framework
MontiArcAutomaton and has been evaluated in various
contexts. Ultimately, black-box code generator
composition facilitates development of code generators
for architecture description languages with embedded
behavior languages and increases code generator
reuse.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Kabac:2016:OMS,
author = "Milan Kab{\'a}c and Charles Consel",
title = "Orchestrating masses of sensors: a design-driven
development approach",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "117--120",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814226",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper proposes a design-driven development
approach that is dedicated to the domain of
orchestration of masses of sensors. The developer
declares what an application does using a
domain-specific language (DSL). Our compiler processes
domain-specific declarations to generate a customized
programming framework that guides and supports the
programming phase.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Foust:2016:GRP,
author = "Gabriel Foust and Jaakko J{\"a}rvi and Sean Parent",
title = "Generating reactive programs for graphical user
interfaces from multi-way dataflow constraint systems",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "121--130",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814207",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For a GUI to remain responsive, it must be able to
schedule lengthy tasks to be executed asynchronously.
In the traditional approach to GUI
implementation--writing functions to handle individual
user events--asynchronous programming easily leads to
defects. Ensuring that all data dependencies are
respected is difficult when new events arrive while
prior events are still being handled. Reactive
programming techniques, gaining popularity in GUI
programming, help since they make data dependencies
explicit and enforce them automatically as variables'
values change. However, data dependencies in GUIs
usually change along with its state. Reactive
programming must therefore describe a GUI as a
collection of many reactive programs, whose interaction
the programmer must explicitly coordinate. This paper
presents a declarative approach for GUI programming
that relieves the programmer from coordinating
asynchronous computations. The approach is based on our
prior work on ``property models'', where GUI state is
maintained by a dataflow constraint system. A property
model responds to user events by atomically
constructing new data dependencies and scheduling
asynchronous computations to enforce those
dependencies. In essence, a property model dynamically
generates a reactive program, adding to it as new
events occur. The approach gives the following
guarantee: the same sequence of events produces the
same results, regardless of the timing of those
events.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Florence:2016:PPP,
author = "Spencer P. Florence and Bruke Fetscher and Matthew
Flatt and William H. Temps and Tina Kiguradze and
Dennis P. West and Charlotte Niznik and Paul R. Yarnold
and Robert Bruce Findler and Steven M. Belknap",
title = "{POP-PL}: a patient-oriented prescription programming
language",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "131--140",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814221",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Medical professionals have long used algorithmic
thinking to describe and implement health care
processes without the benefit of the conceptual
framework provided by a programming language. Instead,
medical algorithms are expressed using English,
flowcharts, or data tables. This results in
prescriptions that are difficult to understand, hard to
debug, and awkward to reuse. This paper reports on the
design and evaluation of a domain-specific programming
language, POP-PL for expressing medical algorithms. The
design draws on the experience of researchers in two
disciplines, programming languages and medicine. The
language is based around the idea that programs and
humans have complementary strengths, that when combined
can make for safer, more accurate performance of
prescriptions. We implemented a prototype of our
language and evaluated its design by writing
prescriptions in the new language and administering a
usability survey to medical professionals. This
formative evaluation suggests that medical
prescriptions can be conveyed by a programming
language's mode of expression and provides useful
information for refining the language. Analysis of the
survey results suggests that medical professionals can
understand and correctly modify programs in POP-PL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Selgrad:2016:LGV,
author = "Kai Selgrad and Alexander Lier and Franz K{\"o}ferl
and Marc Stamminger and Daniel Lohmann",
title = "Lightweight, generative variant exploration for
high-performance graphics applications",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "141--150",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814220",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Rendering performance is an everlasting goal of
computer graphics and significant driver for advances
in both, hardware architecture and algorithms. Thereby,
it has become possible to apply advanced computer
graphics technology even in low-cost embedded
appliances, such as car instruments. Yet, to come up
with an efficient implementation, developers have to
put enormous efforts into hardware/problem-specific
tailoring, fine-tuning, and domain exploration, which
requires profound expert knowledge. If a good solution
has been found, there is a high probability that it
does not work as well with other architectures or even
the next hardware generation. Generative DSL-based
approaches could mitigate these efforts and provide for
an efficient exploration of algorithmic variants and
hardware-specific tuning ideas. However, in vertically
organized industries, such as automotive, suppliers are
reluctant to introduce these techniques as they fear
loss of control, high introduction costs, and
additional constraints imposed by the OEM with respect
to software and tool-chain certification. Moreover,
suppliers do not want to share their generic solutions
with the OEM, but only concrete instances. To this end,
we propose a light-weight and incremental approach for
meta programming of graphics applications. Our approach
relies on an existing formulation of C-like languages
that is amenable to meta programming, which we extend
to become a lightweight language to combine algorithmic
features. Our method provides a concise notation for
meta programs and generates easily sharable output in
the appropriate C-style target language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Seidl:2016:GSP,
author = "Christoph Seidl and Sven Schuster and Ina Schaefer",
title = "Generative software product line development using
variability-aware design patterns",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "151--160",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814212",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software Product Lines (SPLs) are an approach to reuse
in-the-large that models a set of closely related
software systems in terms of commonalities and
variabilities. Design patterns are best practices for
addressing recurring design problems in object-oriented
source code. In the practice of implementing an SPL,
instances of certain design patterns are employed to
handle variability, which makes these
``variability-aware design patterns'' a best practice
for SPL design. However, there currently is no
dedicated method for proactively developing SPL using
design patterns suitable for realizing variable
functionality. In this paper, we present a method to
perform generative SPL development with design
patterns. We use role models to capture design patterns
and their relation to a variability model. We further
allow mapping of individual design pattern roles to
elements of realization artifacts to be generated
(e.g., classes, methods) and check the conformance of
the realization with the specification of the pattern.
With this method, we support proactive development of
SPL using design patterns to apply best practices for
the realization of variability. We present an
implementation of our approach within the Eclipse IDE
and demonstrate it within a case study.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Font:2016:AMR,
author = "Jaime Font and Lorena Arcega and {\O}ystein Haugen and
Carlos Cetina",
title = "Addressing metamodel revisions in model-based software
product lines",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "161--170",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814214",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Metamodels evolve over time, which can break the
conformance between the models and the metamodel. Model
migration strategies aim to co-evolve models and
metamodels together, but their application is not fully
automatizable and is thus cumbersome and error prone.
We introduce the Variable MetaModel (VMM) strategy to
address the evolution of the reusable model assets of a
model-based Software Product Line. The VMM strategy
applies variability modeling ideas to express the
evolution of the metamodel in terms of commonalities
and variabilities. When the metamodel evolves, the
models continue to conform to the VMM, avoiding the
need for migration. We have applied both the
traditional migration strategy and the VMM strategy to
a retrospective case study that includes 13 years of
evolution of our industrial partner, an induction hobs
manufacturer. The comparison between the two strategies
shows better results for the VMM strategy in terms of
model indirection, automation, and trust leak.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Inostroza:2016:MIM,
author = "Pablo Inostroza and Tijs van der Storm",
title = "Modular interpreters for the masses: implicit context
propagation using object algebras",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "171--180",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814209",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modular interpreters have the potential to achieve
component-based language development: instead of
writing language interpreters from scratch, they can be
assembled from reusable, semantic building blocks.
Unfortunately, traditional language interpreters are
hard to extend because different language constructs
may require different interpreter signatures. For
instance, arithmetic interpreters produce a value
without any context information, whereas binding
constructs require an additional environment. In this
paper, we present a practical solution to this problem
based on implicit context propagation. By structuring
denotational-style interpreters as Object Algebras,
base interpreters can be retroactively lifted into new
interpreters that have an extended signature. The
additional parameters are implicitly propagated behind
the scenes, through the evaluation of the base
interpreter. Interpreter lifting enables a flexible
style of component-based language development. The
technique works in mainstream object-oriented
languages, does not sacrifice type safety or separate
compilation, and can be easily automated. We illustrate
implicit context propagation using a modular definition
of Featherweight Java and its extension to support
side-effects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Noguera:2016:MQQ,
author = "Carlos Noguera and Viviane Jonckers",
title = "Model querying with query models",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "181--184",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814225",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Model querying is an integral part of Model-Driven
Engineering. Developers query models when specifying
model transformations, when defining model constraints,
or simply when they need to extract some information
from the model. Model queries are often specified in a
general-purpose programming language, with developers
just navigating models through their programming
interfaces. OCL is the best known model query language,
and while successful, it is difficult to express
complex structural properties featured in target model
elements. In this paper we describe a model query
facility that aims at easing the description of
structural features in a query. In our approach,
developers model their queries by reusing fragments of
the target model to specify the invariant parts of the
template, augmented with variables and special
relations to specify what can vary. The query itself
conforms to a meta-model that extends the meta-model
under-query. By reusing the queried meta- model
developers can reduce the mental overhead that comes
from using a different language to specify the queries.
We have developed a proof of concept tool for the
Eclipse Modeling Framework (EMF) that (1) generates a
query meta-model from a target meta- model, (2) allows
the construction of queries using a graphical,
graph-based editor and (3) executes the queries by
translating them to a set of logic predicates that are
then evaluated using an extension of the logic-based
query language Ekeko.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '15 conference proceedings.",
}
@Article{Zhou:2016:PUH,
author = "Yuanyuan Zhou",
title = "Programming Uncertain {$<$T$>$ hings}",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "1--2",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872416",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Innovation flourishes with good abstractions. For
instance, codification of the IEEE Floating Point
standard in 1985 was critical to the subsequent success
of scientific computing. Programming languages
currently lack appropriate abstractions for uncertain
data. Applications already use estimates from sensors,
machine learning, big data, humans, and approximate
algorithms, but most programming languages do not help
developers address correctness, programmability, and
optimization problems due to estimates. To address
these problems, we propose a new programming
abstraction called Uncertain We encourage the community
to develop and use abstractions for estimates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Abadal:2016:WAF,
author = "Sergi Abadal and Albert Cabellos-Aparicio and Eduard
Alarcon and Josep Torrellas",
title = "{WiSync}: an Architecture for Fast Synchronization
through On-Chip Wireless Communication",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "3--17",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872396",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In shared-memory multiprocessing, fine-grain
synchronization is challenging because it requires
frequent communication. As technology scaling delivers
larger manycore chips, such pattern is expected to
remain costly to support. In this paper, we propose to
address this challenge by using on-chip wireless
communication. Each core has a transceiver and an
antenna to communicate with all the other cores. This
environment supports very low latency global
communication. Our architecture, called WiSync, uses a
per-core Broadcast Memory (BM). When a core writes to
its BM, all the other 100+ BMs get updated in less than
10 processor cycles. We also use a second wireless
channel with cheaper transfers to execute barriers
efficiently. WiSync supports multiprogramming, virtual
memory, and context switching. Our evaluation with
simulations of 128-threaded kernels and 64-threaded
applications shows that WiSync speeds-up
synchronization substantially. Compared to using
advanced conventional synchronization, WiSync attains
an average speedup of nearly one order of magnitude for
the kernels, and 1.12 for PARSEC and SPLASH-2.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Wang:2016:RTE,
author = "Xiaodong Wang and Jos{\'e} F. Mart{\'\i}nez",
title = "{ReBudget}: Trading Off Efficiency vs. Fairness in
Market-Based Multicore Resource Allocation via Runtime
Budget Reassignment",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "19--32",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872382",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficiently allocating shared resources in computer
systems is critical to optimizing execution. Recently,
a number of market-based solutions have been proposed
to attack this problem. Some of them provide provable
theoretical bounds to efficiency and/or fairness losses
under market equilibrium. However, they are limited to
markets with potentially important constraints, such as
enforcing equal budget for all players, or
curve-fitting players' utility into a specific function
type. Moreover, they do not generally provide an
intuitive ``knob'' to control efficiency vs. fairness.
In this paper, we introduce two new metrics, Market
Utility Range (MUR) and Market Budget Range (MBR),
through which we provide for the first time theoretical
bounds on efficiency and fairness of market equilibria
under arbitrary budget assignments. We leverage this
result and propose ReBudget, an iterative budget
re-assignment algorithm that can be used to control
efficiency vs. fairness at run-time. We apply our
algorithm to a multi-resource allocation problem in
multicore chips. Our evaluation using detailed
execution-driven simulations shows that our budget
re-assignment technique is intuitive, effective, and
efficient.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Zhu:2016:DEQ,
author = "Haishan Zhu and Mattan Erez",
title = "Dirigent: Enforcing {QoS} for Latency-Critical Tasks
on Shared Multicore Systems",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "33--47",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872394",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Latency-critical applications suffer from both average
performance degradation and reduced completion time
predictability when collocated with batch tasks. Such
variation forces the system to overprovision resources
to ensure Quality of Service (QoS) for latency-critical
tasks, degrading overall system throughput. We explore
the causes of this variation and exploit the
opportunities of mitigating variation directly to
simultaneously improve both QoS and utilization. We
develop, implement, and evaluate Dirigent, a
lightweight performance-management runtime system that
accurately controls the QoS of latency-critical
applications at fine time scales, leveraging existing
architecture mechanisms. We evaluate Dirigent on a real
machine and show that it is significantly more
effective than configurations representative of prior
schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Kuperman:2016:PR,
author = "Yossi Kuperman and Eyal Moscovici and Joel Nider and
Razya Ladelsky and Abel Gordon and Dan Tsafrir",
title = "Paravirtual Remote {I/O}",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "49--65",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872378",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The traditional ``trap and emulate'' I/O
paravirtualization model conveniently allows for I/O
interposition, yet it inherently incurs costly
guest-host context switches. The newer ``sidecore''
model eliminates this overhead by dedicating host
(side)cores to poll the relevant guest memory regions
and react accordingly without context switching. But
the dedication of sidecores on each host might be
wasteful when I/O activity is low, or it might not
provide enough computational power when I/O activity is
high. We propose to alleviate this problem at rack
scale by consolidating the dedicated sidecores spread
across several hosts onto one server. The hypervisor is
then effectively split into two parts: the local
hypervisor that hosts the VMs, and the remote
hypervisor that processes their paravirtual I/O. We
call this model vRIO---paraVirtual Remote I/O. We find
that by increasing the latency somewhat, it provides
comparable throughput with fewer sidecores and superior
throughput with the same number of sidecores as
compared to the state of the art. vRIO additionally
constitutes a new, cost-effective way to consolidate
I/O devices (on the remote hypervisor) while supporting
efficient programmable I/O interposition.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Kaufmann:2016:HPP,
author = "Antoine Kaufmann and Simon Peter and Naveen Kr. Sharma
and Thomas Anderson and Arvind Krishnamurthy",
title = "High Performance Packet Processing with {FlexNIC}",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "67--81",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872367",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The recent surge of network I/O performance has put
enormous pressure on memory and software I/O processing
sub systems. We argue that the primary reason for high
memory and processing overheads is the inefficient use
of these resources by current commodity network
interface cards (NICs). We propose FlexNIC, a flexible
network DMA interface that can be used by operating
systems and applications alike to reduce packet
processing overheads. FlexNIC allows services to
install packet processing rules into the NIC, which
then executes simple operations on packets while
exchanging them with host memory. Thus, our proposal
moves some of the packet processing traditionally done
in software to the NIC, where it can be done flexibly
and at high speed. We quantify the potential benefits
of FlexNIC by emulating the proposed FlexNIC
functionality with existing hardware or in software. We
show that significant gains in application performance
are possible, in terms of both latency and throughput,
for several widely used applications, including a
key-value store, a stream processing system, and an
intrusion detection system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Bornholt:2016:SCF,
author = "James Bornholt and Antoine Kaufmann and Jialin Li and
Arvind Krishnamurthy and Emina Torlak and Xi Wang",
title = "Specifying and Checking File System Crash-Consistency
Models",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "83--98",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872406",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Applications depend on persistent storage to recover
state after system crashes. But the POSIX file system
interfaces do not define the possible outcomes of a
crash. As a result, it is difficult for application
writers to correctly understand the ordering of and
dependencies between file system operations, which can
lead to corrupt application state and, in the worst
case, catastrophic data loss. This paper presents
crash-consistency models, analogous to memory
consistency models, which describe the behavior of a
file system across crashes. Crash-consistency models
include both litmus tests, which demonstrate allowed
and forbidden behaviors, and axiomatic and operational
specifications. We present a formal framework for
developing crash-consistency models, and a toolkit,
called Ferrite, for validating those models against
real file system implementations. We develop a
crash-consistency model for ext4, and use Ferrite to
demonstrate unintuitive crash behaviors of the ext4
implementation. To demonstrate the utility of
crash-consistency models to application writers, we use
our models to prototype proof-of-concept verification
and synthesis tools, as well as new library interfaces
for crash-safe applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Prasad:2016:PMR,
author = "Aravinda Prasad and K. Gopinath",
title = "Prudent Memory Reclamation in Procrastination-Based
Synchronization",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "99--112",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872405",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Procrastination is the fundamental technique used in
synchronization mechanisms such as Read-Copy-Update
(RCU) where writers, in order to synchronize with
readers, defer the freeing of an object until there are
no readers referring to the object. The synchronization
mechanism determines when the deferred object is safe
to reclaim and when it is actually reclaimed. Hence,
such memory reclamations are completely oblivious of
the memory allocator state. This induces poor memory
allocator performance, for instance, when the
reclamations are ill-timed. Furthermore, deferred
objects provide hints about the future that inform
memory regions that are about to be freed. Although
useful, hints are not exploited as deferred objects are
not visible to memory allocators. We introduce
Prudence, a dynamic memory allocator, that is tightly
integrated with the synchronization mechanism to ensure
visibility of deferred objects to the memory allocator.
Such an integration enables Prudence to (i) identify
the safe time to reclaim deferred objects' memory, (ii)
have an inclusive view of the allocated, free and
about-to-be-freed objects, and (iii) exploit
optimizations based on the hints about the future
during important state transitions. Our evaluation in
the Linux kernel shows that Prudence integrated with
RCU performs 3.9X to 28X better in micro-benchmarks
compared to SLUB, a recent memory allocator in the
Linux kernel. It also improves the overall performance
perceptibly (4\%-18\%) for a mix of widely used
synthetic and application benchmarks. Further, it
performs better (up to 98\%) in terms of object hits in
caches, object cache churns, slab churns, peak memory
usage and total fragmentation, when compared with the
SLUB allocator.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Mukkara:2016:WID,
author = "Anurag Mukkara and Nathan Beckmann and Daniel
Sanchez",
title = "{Whirlpool}: Improving Dynamic Cache Management with
Static Data Classification",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "113--127",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872363",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cache hierarchies are increasingly non-uniform and
difficult to manage. Several techniques, such as
scratchpads or reuse hints, use static information
about how programs access data to manage the memory
hierarchy. Static techniques are effective on regular
programs, but because they set fixed policies, they are
vulnerable to changes in program behavior or available
cache space. Instead, most systems rely on dynamic
caching policies that adapt to observed program
behavior. Unfortunately, dynamic policies spend
significant resources trying to learn how programs use
memory, and yet they often perform worse than a static
policy. We present Whirlpool, a novel approach that
combines static information with dynamic policies to
reap the benefits of each. Whirlpool statically
classifies data into pools based on how the program
uses memory. Whirlpool then uses dynamic policies to
tune the cache to each pool. Hence, rather than setting
policies statically, Whirlpool uses static analysis to
guide dynamic policies. We present both an API that
lets programmers specify pools manually and a profiling
tool that discovers pools automatically in unmodified
binaries. We evaluate Whirlpool on a state-of-the-art
NUCA cache. Whirlpool significantly outperforms prior
approaches: on sequential programs, Whirlpool improves
performance by up to 38\% and reduces data movement
energy by up to 53\%; on parallel programs, Whirlpool
improves performance by up to 67\% and reduces data
movement energy by up to 2.6x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Jeon:2016:TTD,
author = "Myeongjae Jeon and Yuxiong He and Hwanju Kim and Sameh
Elnikety and Scott Rixner and Alan L. Cox",
title = "{TPC}: Target-Driven Parallelism Combining Prediction
and Correction to Reduce Tail Latency in Interactive
Services",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "129--141",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872370",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In interactive services such as web search,
recommendations, games and finance, reducing the tail
latency is crucial to provide fast response to every
user. Using web search as a driving example, we
systematically characterize interactive workload to
identify the opportunities and challenges for reducing
tail latency. We find that the workload consists of
mainly short requests that do not benefit from
parallelism, and a few long requests which
significantly impact the tail but exhibit high
parallelism speedup. This motivates estimating request
execution time, using a predictor, to identify long
requests and to parallelize them. Prediction, however,
is not perfect; a long request mispredicted as short is
likely to contribute to the server tail latency,
setting a ceiling on the achievable tail latency. We
propose TPC, an approach that combines prediction
information judiciously with dynamic correction for
inaccurate prediction. Dynamic correction increases
parallelism to accelerate a long request that is
mispredicted as short. TPC carefully selects the
appropriate target latencies based on system load and
parallelism efficiency to reduce tail latency. We
implement TPC and several prior approaches to compare
them experimentally on a single search server and on a
cluster of 40 search servers. The experimental results
show that TPC reduces the 99th- and 99.9th-percentile
latency by up to 40\% compared with the best prior
work. Moreover, we evaluate TPC on a finance server,
demonstrating its effectiveness on reducing tail
latency of interactive services beyond web search.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Brown:2016:HBS,
author = "Fraser Brown and Andres N{\"o}tzli and Dawson Engler",
title = "How to Build Static Checking Systems Using Orders of
Magnitude Less Code",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "143--157",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872364",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern static bug finding tools are complex. They
typically consist of hundreds of thousands of lines of
code, and most of them are wedded to one language (or
even one compiler). This complexity makes the systems
hard to understand, hard to debug, and hard to retarget
to new languages, thereby dramatically limiting their
scope. This paper reduces checking system complexity by
addressing a fundamental assumption, the assumption
that checkers must depend on a full-blown language
specification and compiler front end. Instead, our
program checkers are based on drastically incomplete
language grammars (``micro-grammars'') that describe
only portions of a language relevant to a checker. As a
result, our implementation is tiny-roughly 2500 lines
of code, about two orders of magnitude smaller than a
typical system. We hope that this dramatic increase in
simplicity will allow people to use more checkers on
more systems in more languages. We implement our
approach in $ \mu $ chex, a language-agnostic framework
for writing static bug checkers. We use it to build
micro-grammar based checkers for six languages (C, the
C preprocessor, C++, Java, JavaScript, and Dart) and
find over 700 errors in real-world projects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Zhang:2016:TED,
author = "Tong Zhang and Dongyoon Lee and Changhee Jung",
title = "{TxRace}: Efficient Data Race Detection Using
Commodity Hardware Transactional Memory",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "159--173",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872384",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Detecting data races is important for debugging
shared-memory multithreaded programs, but the high
runtime overhead prevents the wide use of dynamic data
race detectors. This paper presents TxRace, a new
software data race detector that leverages commodity
hardware transactional memory (HTM) to speed up data
race detection. TxRace instruments a multithreaded
program to transform synchronization-free regions into
transactions, and exploits the conflict detection
mechanism of HTM for lightweight data race detection at
runtime. However, the limitations of the current
best-effort commodity HTMs expose several challenges in
using them for data race detection: (1) lack of ability
to pinpoint racy instructions, (2) false positives
caused by cache line granularity of conflict detection,
and (3) transactional aborts for non-conflict reasons
(e.g., capacity or unknown). To overcome these
challenges, TxRace performs lightweight HTM-based data
race detection at first, and occasionally switches to
slow yet precise data race detection only for the small
fraction of execution intervals in which potential
races are reported by HTM. According to the
experimental results, TxRace reduces the average
runtime overhead of dynamic data race detection from
11.68x to 4.65x with only a small number of false
negatives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Amani:2016:CVH,
author = "Sidney Amani and Alex Hixon and Zilin Chen and
Christine Rizkallah and Peter Chubb and Liam O'Connor
and Joel Beeren and Yutaka Nagashima and Japheth Lim
and Thomas Sewell and Joseph Tuong and Gabriele Keller
and Toby Murray and Gerwin Klein and Gernot Heiser",
title = "{CoGENT}: Verifying High-Assurance File System
Implementations",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "175--188",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872404",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an approach to writing and formally
verifying high-assurance file-system code in a
restricted language called COGENT, supported by a
certifying compiler that produces C code, high-level
specification of COGENT, and translation correctness
proofs. The language is strongly typed and guarantees
absence of a number of common file system
implementation errors. We show how verification effort
is drastically reduced for proving higher-level
properties of the file system implementation by
reasoning about the generated formal specification
rather than its low-level C code. We use the framework
to write two Linux file systems, and compare their
performance with their native C implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Asmussen:2016:MHO,
author = "Nils Asmussen and Marcus V{\"o}lp and Benedikt
N{\"o}then and Hermann H{\"a}rtig and Gerhard
Fettweis",
title = "{M3}: a Hardware\slash Operating-System Co-Design to
Tame Heterogeneous Manycores",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "189--203",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872371",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the last decade, the number of available cores
increased and heterogeneity grew. In this work, we ask
the question whether the design of the current
operating systems (OSes) is still appropriate if these
trends continue and lead to abundantly available but
heterogeneous cores, or whether it forces a fundamental
rethinking of how systems are designed. We argue that:
1. hiding heterogeneity behind a common hardware
interface unifies, to a large extent, the control and
coordination of cores and accelerators in the OS, 2.
isolating at the network-on-chip rather than with
processor features (like privileged mode, memory
management unit, ...), allows running untrusted code on
arbitrary cores, and 3. providing OS services via
protocols over the network-on-chip, instead of via
system calls, makes them accessible to arbitrary types
of cores as well. In summary, this turns accelerators
into first-class citizens and enables a single and
convenient programming environment for all cores
without the need to trust any application. In this
paper, we introduce network-on-chip-level isolation,
present the design of our microkernel-based OS, M3, and
the common hardware interface, and evaluate the
performance of our prototype in comparison to Linux. A
bit surprising, without using accelerators, M3
outperforms Linux in some application-level benchmarks
by more than a factor of five.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Liaqat:2016:SEE,
author = "Daniyal Liaqat and Silviu Jingoi and Eyal de Lara and
Ashvin Goel and Wilson To and Kevin Lee and Italo {De
Moraes Garcia} and Manuel Saldana",
title = "{Sidewinder}: an Energy Efficient and Developer
Friendly Heterogeneous Architecture for Continuous
Mobile Sensing",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "205--215",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872398",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Applications that perform continuous sensing on mobile
phones have the potential to revolutionize everyday
life. Examples range from medical and health monitoring
applications, such as pedometers and fall detectors, to
participatory sensing applications, such as noise
pollution, traffic and seismic activity monitoring.
Unfortunately, current mobile devices are a poor match
for continuous sensing applications as they require the
device to remain awake for extended periods of time,
resulting in poor battery life. This paper presents
Sidewinder, a new approach towards offloading sensor
data processing to a low-power processor and waking up
the main processor when events of interest occur. This
approach differs from other heterogeneous architectures
in that developers are presented with a programming
interface that lets them construct application specific
wake-up conditions by linking together and
parameterizing predefined sensor data processing
algorithms. Our experiments indicate performance that
is comparable to approaches that provide fully
programmable offloading, but do so with a much simpler
programming interface that facilitates deployment and
portability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Balkind:2016:OOS,
author = "Jonathan Balkind and Michael McKeown and Yaosheng Fu
and Tri Nguyen and Yanqi Zhou and Alexey Lavrov and
Mohammad Shahrad and Adi Fuchs and Samuel Payne and
Xiaohua Liang and Matthew Matl and David Wentzlaff",
title = "{OpenPiton}: an Open Source Manycore Research
Framework",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "217--232",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872414",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/gnu.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Industry is building larger, more complex, manycore
processors on the back of strong institutional
knowledge, but academic projects face difficulties in
replicating that scale. To alleviate these difficulties
and to develop and share knowledge, the community needs
open architecture frameworks for simulation, synthesis,
and software exploration which support extensibility,
scalability, and configurability, alongside an
established base of verification tools and supported
software. In this paper we present OpenPiton, an open
source framework for building scalable architecture
research prototypes from 1 core to 500 million cores.
OpenPiton is the world's first open source,
general-purpose, multithreaded manycore processor and
framework. OpenPiton leverages the industry hardened
OpenSPARC T1 core with modifications and builds upon it
with a scratch-built, scalable uncore creating a
flexible, modern manycore design. In addition,
OpenPiton provides synthesis and backend scripts for
ASIC and FPGA to enable other researchers to bring
their designs to implementation. OpenPiton provides a
complete verification infrastructure of over 8000
tests, is supported by mature software tools, runs
full-stack multiuser Debian Linux, and is written in
industry standard Verilog. Multiple implementations of
OpenPiton have been created including a taped-out
25-core implementation in IBM's 32nm process and
multiple Xilinx FPGA prototypes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Lustig:2016:CVM,
author = "Daniel Lustig and Geet Sethi and Margaret Martonosi
and Abhishek Bhattacharjee",
title = "{COATCheck}: Verifying Memory Ordering at the
Hardware-{OS} Interface",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "233--247",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872399",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern computer systems include numerous compute
elements, from CPUs to GPUs to accelerators. Harnessing
their full potential requires well-defined,
properly-implemented memory consistency models (MCMs),
and low-level system functionality such as virtual
memory and address translation (AT). Unfortunately, it
is difficult to specify and implement hardware-OS
interactions correctly; in the past, many hardware and
OS specification mismatches have resulted in
implementation bugs in commercial processors. In an
effort to resolve this verification gap, this paper
makes the following contributions. First, we present
COATCheck, an address translation-aware framework for
specifying and statically verifying memory ordering
enforcement at the microarchitecture and operating
system levels. We develop a domain-specific language
for specifying ordering enforcement, for including
ordering-related OS events and hardware
micro-operations, and for programmatically enumerating
happens-before graphs. Using a fast and automated
static constraint solver, COATCheck can efficiently
analyze interesting and important memory ordering
scenarios for modern, high-performance, out-of-order
processors. Second, we show that previous work on
Virtual Address Memory Consistency (VAMC) does not
capture every translation-related ordering scenario of
interest, and that some such cases even fall outside
the traditional scope of consistency. We therefore
introduce the term transistency model to describe the
superset of consistency which captures all
translation-aware sets of ordering rules.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Markuze:2016:TIP,
author = "Alex Markuze and Adam Morrison and Dan Tsafrir",
title = "True {IOMMU} Protection from {DMA} Attacks: When Copy
is Faster than Zero Copy",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "249--262",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872379",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Malicious I/O devices might compromise the OS using
DMAs. The OS therefore utilizes the IOMMU to map and
unmap every target buffer right before and after its
DMA is processed, thereby restricting DMAs to their
designated locations. This usage model, however, is not
truly secure for two reasons: (1) it provides
protection at page granularity only, whereas DMA
buffers can reside on the same page as other data; and
(2) it delays DMA buffer unmaps due to performance
considerations, creating a vulnerability window in
which devices can access in-use memory. We propose that
OSes utilize the IOMMU differently, in a manner that
eliminates these two flaws. Our new usage model
restricts device access to a set of shadow DMA buffers
that are never unmapped, and it copies DMAed data
to/from these buffers, thus providing sub-page
protection while eliminating the aforementioned
vulnerability window. Our key insight is that the cost
of interacting with, and synchronizing access to the
slow IOMMU hardware---required for zero-copy protection
against devices---make copying preferable to
zero-copying. We implement our model in Linux and
evaluate it with standard networking benchmarks
utilizing a 40,Gb/s NIC. We demonstrate that despite
being more secure than the safest preexisting usage
model, our approach provides up to 5x higher
throughput. Additionally, whereas it is inherently less
scalable than an IOMMU-less (unprotected) system, our
approach incurs only 0\%--25\% performance degradation
in comparison.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Awad:2016:SSZ,
author = "Amro Awad and Pratyusa Manadhata and Stuart Haber and
Yan Solihin and William Horne",
title = "Silent Shredder: Zero-Cost Shredding for Secure
Non-Volatile Main Memory Controllers",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "263--276",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872377",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As non-volatile memory (NVM) technologies are expected
to replace DRAM in the near future, new challenges have
emerged. For example, NVMs have slow and
power-consuming writes, and limited write endurance. In
addition, NVMs have a data remanence vulnerability,
i.e., they retain data for a long time after being
powered off. NVM encryption alleviates the
vulnerability, but exacerbates the limited endurance by
increasing the number of writes to memory. We observe
that, in current systems, a large percentage of main
memory writes result from data shredding in operating
systems, a process of zeroing out physical pages before
mapping them to new processes, in order to protect
previous processes' data. In this paper, we propose
Silent Shredder, which repurposes initialization
vectors used in standard counter mode encryption to
completely eliminate the data shredding writes. Silent
Shredder also speeds up reading shredded cache lines,
and hence reduces power consumption and improves
overall performance. To evaluate our design, we run
three PowerGraph applications and 26 multi-programmed
workloads from the SPEC 2006 suite, on a gem5-based
full system simulator. Silent Shredder eliminates an
average of 48.6\% of the writes in the initialization
and graph construction phases. It speeds up main memory
reads by 3.3 times, and improves the number of
instructions per cycle (IPC) by 6.4\% on average.
Finally, we discuss several use cases, including
virtual machines' data isolation and user-level large
data initialization, where Silent Shredder can be used
effectively at no extra cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Kwon:2016:SPT,
author = "Youngjin Kwon and Alan M. Dunn and Michael Z. Lee and
Owen S. Hofmann and Yuanzhong Xu and Emmett Witchel",
title = "{Sego}: Pervasive Trusted Metadata for Efficiently
Verified Untrusted System Services",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "277--290",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872372",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sego is a hypervisor-based system that gives strong
privacy and integrity guarantees to trusted
applications, even when the guest operating system is
compromised or hostile. Sego verifies operating system
services, like the file system, instead of replacing
them. By associating trusted metadata with user data
across all system devices, Sego verifies system
services more efficiently than previous systems,
especially services that depend on data contents. We
extensively evaluate Sego's performance on real
workloads and implement a kernel fault injector to
validate Sego's file system-agnostic crash consistency
and recovery protocol.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Tsafrir:2016:SAW,
author = "Dan Tsafrir",
title = "Synopsis of the {ASPLOS '16 Wild and Crazy Ideas
(WACI)} Invited-Speakers Session",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "291--294",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2876512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Wild and Crazy Ideas (WACI) session is a
longstanding tradition at ASPLOS, soliciting talks that
consist of forward-looking, visionary, inspiring,
creative, far out or just plain amazing ideas presented
in an exciting way. (Amusing elements in the
presentations are tolerated ;-) but are in fact
optional.) The first WACI session took place in 1998.
Back then, the call for talks included a problem
statement, which contended that ``papers usually do not
get admitted to [such conferences as] ISCA or ASPLOS
unless the systems that they describe are mature enough
to run [some standard benchmark suites, which] has a
chilling effect on the idea generation
process---encouraging incremental research'' [1]. The
1998 WACI session turned out to be a great success. Its
webpage states that ``there were 42 submissions
[competing over] only eight time slots, [which resulted
in] this session [having] a lower acceptance rate than
the conference itself'' [2]. But the times they are
a-changin' [3], and the WACI session no longer enjoys
that many submissions (Figure 1), perhaps because
nowadays there exist many forums for researchers to
describe/discuss their preliminary ideas, including:
the ``hot topics in'' workshops [4--7]; a journal like
CAL, dedicated to early results [8]; main conferences
soliciting short submissions describing ``original or
unconventional ideas at a preliminary stage'' in
addition to regular papers [9]; and the many workshops
co-located with main conferences, like ISCA '15, which
hosted thirteen such workshops [10]. Regardless of the
reason for the declining number of submissions, this
time we've decided to organize the WACI session
differently to ensure its continued high quality.
Instead of soliciting talks via an open call and hoping
for the best, we proactively invited speakers whom we
believe are capable of delivering excellent WACI
presentations. That is, this year's WACI session
consists exclusively of invited speakers. Filling up
the available slots turned out to be fairly easy, as
most of the researchers we invited promptly accepted
our invitation. The duration of each talk was set to be
eight minutes (exactly as in the first WACI session
from 1998) plus two minutes for questions. The talks
are outlined below. We believe they are interesting and
exciting, and we hope the attendees of the session will
find them stimulating and insightful.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Williams:2016:BIC,
author = "R. Stanley Williams",
title = "Brain Inspired Computing",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "295--295",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872417",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Phothilimthana:2016:SS,
author = "Phitchaya Mangpo Phothilimthana and Aditya Thakur and
Rastislav Bodik and Dinakar Dhurjati",
title = "Scaling up Superoptimization",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "297--310",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872387",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developing a code optimizer is challenging, especially
for new, idiosyncratic ISAs. Superoptimization can, in
principle, discover machine-specific optimizations
automatically by searching the space of all instruction
sequences. If we can increase the size of code
fragments a superoptimizer can optimize, we will be
able to discover more optimizations. We develop LENS, a
search algorithm that increases the size of code a
superoptimizer can synthesize by rapidly pruning away
invalid candidate programs. Pruning is achieved by
selectively refining the abstraction under which
candidates are considered equivalent, only in the
promising part of the candidate space. LENS also uses a
bidirectional search strategy to prune the candidate
space from both forward and backward directions. These
pruning strategies allow LENS to solve twice as many
benchmarks as existing enumerative search algorithms,
while LENS is about 11-times faster. Additionally, we
increase the effective size of the superoptimized
fragments by relaxing the correctness condition using
contexts (surrounding code). Finally, we combine LENS
with complementary search techniques into a cooperative
superoptimizer, which exploits the stochastic search to
make random jumps in a large candidate space, and a
symbolic (SAT-solver-based) search to synthesize
arbitrary constants. While existing superoptimizers
consistently solve 9--16 out of 32 benchmarks, the
cooperative superoptimizer solves 29 benchmarks. It can
synthesize code fragments that are up to 82\% faster
than code generated by gcc -O3 from WiBench and
MiBench.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Hasabnis:2016:LAI,
author = "Niranjan Hasabnis and R. Sekar",
title = "Lifting Assembly to Intermediate Representation: a
Novel Approach Leveraging Compilers",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "311--324",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872380",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Translating low-level machine instructions into
higher-level intermediate language (IL) is one of the
central steps in many binary analysis and
instrumentation systems. Existing systems build such
translators manually. As a result, it takes a great
deal of effort to support new architectures. Even for
widely deployed architectures, full instruction sets
may not be modeled, e.g., mature systems such as
Valgrind still lack support for AVX, FMA4 and SSE4.1
for x86 processors. To overcome these difficulties, we
propose a novel approach that leverages knowledge about
instruction set semantics that is already embedded into
modern compilers such as GCC. In particular, we present
a learning-based approach for automating the
translation of assembly instructions to a compiler's
architecture-neutral IL. We present an experimental
evaluation that demonstrates the ability of our
approach to easily support many architectures (x86, ARM
and AVR), including their advanced instruction sets.
Our implementation is available as open-source
software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Muralidharan:2016:AAC,
author = "Saurav Muralidharan and Amit Roy and Mary Hall and
Michael Garland and Piyush Rai",
title = "Architecture-Adaptive Code Variant Tuning",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "325--338",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872411",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Code variants represent alternative implementations of
a computation, and are common in high-performance
libraries and applications to facilitate selecting the
most appropriate implementation for a specific
execution context (target architecture and input
dataset). Automating code variant selection typically
relies on machine learning to construct a model during
an offline learning phase that can be quickly queried
at runtime once the execution context is known. In this
paper, we define a new approach called
architecture-adaptive code variant tuning, where the
variant selection model is learned on a set of source
architectures, and then used to predict variants on a
new target architecture without having to repeat the
training process. We pose this as a multi-task learning
problem, where each source architecture corresponds to
a task; we use device features in the construction of
the variant selection model. This work explores the
effectiveness of multi-task learning and the impact of
different strategies for device feature selection. We
evaluate our approach on a set of benchmarks and a
collection of six NVIDIA GPU architectures from three
distinct generations. We achieve performance results
that are mostly comparable to the previous approach of
tuning for a single GPU architecture without having to
repeat the learning phase.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Lin:2016:SKT,
author = "Xiaofeng Lin and Yu Chen and Xiaodong Li and Junjie
Mao and Jiaquan He and Wei Xu and Yuanchun Shi",
title = "Scalable Kernel {TCP} Design and Implementation for
Short-Lived Connections",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "339--352",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872391",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the rapid growth of network bandwidth, increases
in CPU cores on a single machine, and application API
models demanding more short-lived connections, a
scalable TCP stack is performance-critical. Although
many clean-state designs have been proposed, production
environments still call for a bottom-up parallel TCP
stack design that is backward-compatible with existing
applications. We present Fastsocket, a BSD
Socket-compatible and scalable kernel socket design,
which achieves table-level connection partition in TCP
stack and guarantees connection locality for both
passive and active connections. Fastsocket architecture
is a ground up partition design, from NIC interrupts
all the way up to applications, which naturally
eliminates various lock contentions in the entire
stack. Moreover, Fastsocket maintains the full
functionality of the kernel TCP stack and
BSD-socket-compatible API, and thus applications need
no modifications. Our evaluations show that Fastsocket
achieves a speedup of 20.4x on a 24-core machine under
a workload of short-lived connections, outperforming
the state-of-the-art Linux kernel TCP implementations.
When scaling up to 24 CPU cores, Fastsocket increases
the throughput of Nginx and HAProxy by 267\% and 621\%
respectively compared with the base Linux kernel. We
also demonstrate that Fastsocket can achieve
scalability and preserve BSD socket API at the same
time. Fastsocket is already deployed in the production
environment of Sina WeiBo, serving 50 million daily
active users and billions of requests per day.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{ElHajj:2016:SPM,
author = "Izzat {El Hajj} and Alexander Merritt and Gerd
Zellweger and Dejan Milojicic and Reto Achermann and
Paolo Faraboschi and Wen-mei Hwu and Timothy Roscoe and
Karsten Schwan",
title = "{SpaceJMP}: Programming with Multiple Virtual Address
Spaces",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "353--368",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872366",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Memory-centric computing demands careful organization
of the virtual address space, but traditional methods
for doing so are inflexible and inefficient. If an
application wishes to address larger physical memory
than virtual address bits allow, if it wishes to
maintain pointer-based data structures beyond process
lifetimes, or if it wishes to share large amounts of
memory across simultaneously executing processes,
legacy interfaces for managing the address space are
cumbersome and often incur excessive overheads. We
propose a new operating system design that promotes
virtual address spaces to first-class citizens,
enabling process threads to attach to, detach from, and
switch between multiple virtual address spaces. Our
work enables data-centric applications to utilize vast
physical memory beyond the virtual range, represent
persistent pointer-rich data structures without special
pointer representations, and share large amounts of
memory between processes efficiently. We describe our
prototype implementations in the DragonFly BSD and
Barrelfish operating systems. We also present
programming semantics and a compiler transformation to
detect unsafe pointer usage. We demonstrate the
benefits of our work on data-intensive applications
such as the GUPS benchmark, the SAMTools genomics
workflow, and the Redis key-value store.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Lin:2016:MTP,
author = "Felix Xiaozhu Lin and Xu Liu",
title = "{\tt memif}: Towards Programming Heterogeneous Memory
Asynchronously",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "369--383",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872401",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To harness a heterogeneous memory hierarchy, it is
advantageous to integrate application knowledge in
guiding frequent memory move, i.e., replicating or
migrating virtual memory regions. To this end, we
present memif, a protected OS service for asynchronous,
hardware-accelerated memory move. Compared to the state
of the art --- page migration in Linux, memif incurs
low overhead and low latency; in order to do so, it not
only redefines the semantics of kernel interface but
also overhauls the underlying mechanisms, including
request/completion management, race handling, and DMA
engine configuration. We implement memif in Linux for a
server-class system-on-chip that features heterogeneous
memories. Compared to the current Linux page migration,
memif reduces CPU usage by up to 15\% for small pages
and by up to 38x for large pages; in continuously
serving requests, memif has no need for request
batching and reduces latency by up to 63\%. By crafting
a small runtime atop memif, we improve the throughputs
for a set of streaming workloads by up to 33\%.
Overall, memif has opened the door to software
management of heterogeneous memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Kim:2016:NEN,
author = "Wook-Hee Kim and Jinwoong Kim and Woongki Baek and
Beomseok Nam and Youjip Won",
title = "{NVWAL}: Exploiting {NVRAM} in Write-Ahead Logging",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "385--398",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872392",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging byte-addressable non-volatile memory is
considered an alternative storage device for database
logs that require persistency and high performance. In
this work, we develop NVWAL (NVRAM Write-Ahead Logging)
for SQLite. The contribution of NVWAL consists of three
elements: (i) byte-granularity differential logging
that effectively eliminates the excessive I/O overhead
of filesystem-based logging or journaling, (ii)
transaction-aware lazy synchronization that reduces
cache synchronization overhead by two-thirds, and (iii)
user-level heap management of the NVRAM persistent WAL
structure, which reduces the overhead of managing
persistent objects. We implemented NVWAL in SQLite and
measured the performance on a Nexus 5 smartphone and an
NVRAM emulation board --- Tuna. Our performance study
shows the following: (i) the overhead of enforcing
strict ordering of NVRAM writes can be reduced via
NVRAM-aware transaction management. (ii) From the
application performance point of view, the overhead of
guaranteeing failure atomicity is negligible; the cache
line flush overhead accounts for only 0.8~4.6\% of
transaction execution time. Therefore, application
performance is much less sensitive to the NVRAM
performance than we expected. Decreasing the NVRAM
latency by one-fifth (from 1942 nsec to 437 nsec),
SQLite achieves a mere 4\% performance gain (from 2517
ins/sec to 2621 ins/sec). (iii) Overall, when the write
latency of NVRAM is 2 usec, NVWAL increases SQLite
performance by at least 10x compared to that of WAL on
flash memory (from 541 ins/sec to 5812 ins/sec).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Kolli:2016:HPT,
author = "Aasheesh Kolli and Steven Pelley and Ali Saidi and
Peter M. Chen and Thomas F. Wenisch",
title = "High-Performance Transactions for Persistent
Memories",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "399--411",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872381",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging non-volatile memory (NVRAM) technologies
offer the durability of disk with the
byte-addressability of DRAM. These devices will allow
software to access persistent data structures directly
in NVRAM using processor loads and stores, however,
ensuring consistency of persistent data across power
failures and crashes is difficult. Atomic, durable
transactions are a widely used abstraction to enforce
such consistency. Implementing transactions on NVRAM
requires the ability to constrain the order of NVRAM
writes, for example, to ensure that a transaction's log
record is complete before it is marked committed. Since
NVRAM write latencies are expected to be high,
minimizing these ordering constraints is critical for
achieving high performance. Recent work has proposed
programming interfaces to express NVRAM write ordering
constraints to hardware so that NVRAM writes may be
coalesced and reordered while preserving necessary
constraints. Unfortunately, a straightforward
implementation of transactions under these interfaces
imposes unnecessary constraints. We show how to remove
these dependencies through a variety of techniques,
notably, deferring commit until after locks are
released. We present a comprehensive analysis
contrasting two transaction designs across three NVRAM
programming interfaces, demonstrating up to 2.5x
speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Guo:2016:HDI,
author = "Qing Guo and Karin Strauss and Luis Ceze and Henrique
S. Malvar",
title = "High-Density Image Storage Using Approximate Memory
Cells",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "413--426",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872413",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper proposes tailoring image encoding for an
approximate storage substrate. We demonstrate that
indiscriminately storing encoded images in approximate
memory generates unacceptable and uncontrollable
quality degradation. The key finding is that errors in
the encoded bit streams have non-uniform impact on the
decoded image quality. We develop a methodology to
determine the relative importance of encoded bits and
store them in an approximate storage substrate. The
storage cells are optimized to reduce error rate via
biasing and are tuned to meet the desired reliability
requirement via selective error correction. In a case
study with the progressive transform codec (PTC), a
precursor to JPEG XR, the proposed approximate image
storage system exhibits a 2.7x increase in density of
pixels per silicon volume under bounded error rates,
and this achievement is additive to the storage savings
of PTC compression.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Izraelevitz:2016:FAP,
author = "Joseph Izraelevitz and Terence Kelly and Aasheesh
Kolli",
title = "Failure-Atomic Persistent Memory Updates via {JUSTDO}
Logging",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "427--442",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872410",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Persistent memory invites applications to manipulate
persistent data via load and store instructions.
Because failures during updates may destroy transient
data (e.g., in CPU registers), preserving data
integrity in the presence of failures requires
failure-atomic bundles of updates. Prior failure
atomicity approaches for persistent memory entail
overheads due to logging and CPU cache flushing.
Persistent caches can eliminate the need for flushing,
but conventional logging remains complex and memory
intensive. We present the design and implementation of
JUSTDO logging, a new failure atomicity mechanism that
greatly reduces the memory footprint of logs,
simplifies log management, and enables fast parallel
recovery following failure. Crash-injection tests
confirm that JUSTDO logging preserves application data
integrity and performance evaluations show that it
improves throughput 3x or more compared with a
state-of-the-art alternative for a spectrum of
data-intensive algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Han:2016:IMD,
author = "Jaeung Han and Seungheun Jeon and Young-ri Choi and
Jaehyuk Huh",
title = "Interference Management for Distributed Parallel
Applications in Consolidated Clusters",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "443--456",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872388",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Consolidating multiple applications on a system can
improve the overall resource utilization of data center
systems. However, such consolidation can adversely
affect the performance of some applications due to
interference caused by resource contention. Despite
many prior studies on the interference effects in
single-node systems, the interference behaviors of
distributed parallel applications have not been
investigated thoroughly. With distributed applications,
a local interference in a node can affect the whole
execution of an application spanning many nodes. This
paper studies an interference modeling methodology for
distributed applications to predict their performance
under interference effects in consolidated clusters.
This study first characterizes the effects of
interference for various distributed applications over
different interference settings, and analyzes how
diverse interference intensities on multiple nodes
affect the overall performance. Based on the
characterization, this study proposes a static
profiling-based model for interference propagation and
heterogeneity behaviors. In addition, this paper
presents use case studies of the modeling method, two
interference-aware placement techniques for
consolidated virtual clusters, which attempt to
maximize the overall throughput or to guarantee the
quality-of-service.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Maas:2016:THL,
author = "Martin Maas and Krste Asanovi{\'c} and Tim Harris and
John Kubiatowicz",
title = "{Taurus}: a Holistic Language Runtime System for
Coordinating Distributed Managed-Language
Applications",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "457--471",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872386",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many distributed workloads in today's data centers are
written in managed languages such as Java or Ruby.
Examples include big data frameworks such as Hadoop,
data stores such as Cassandra or applications such as
the SOLR search engine. These workloads typically run
across many independent language runtime systems on
different nodes. This setup represents a source of
inefficiency, as these language runtime systems are
unaware of each other. For example, they may perform
Garbage Collection at times that are locally reasonable
but not in a distributed setting. We address these
problems by introducing the concept of a Holistic
Runtime System that makes runtime-level decisions for
the entire distributed application rather than locally.
We then present Taurus, a Holistic Runtime System
prototype. Taurus is a JVM drop-in replacement,
requires almost no configuration and can run unmodified
off-the-shelf Java applications. Taurus enforces
user-defined coordination policies and provides a DSL
for writing these policies. By applying Taurus to
Garbage Collection, we demonstrate the potential of
such a system and use it to explore coordination
strategies for the runtime systems of real-world
distributed applications, to improve application
performance and address tail-latencies in
latency-sensitive workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Delimitrou:2016:HRE,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{HCloud}: Resource-Efficient Provisioning in Shared
Cloud Systems",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "473--488",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872365",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cloud computing promises flexibility and high
performance for users and cost efficiency for
operators. To achieve this, cloud providers offer
instances of different sizes, both as long-term
reservations and short-term, on-demand allocations.
Unfortunately, determining the best provisioning
strategy is a complex, multi-dimensional problem that
depends on the load fluctuation and duration of
incoming jobs, and the performance unpredictability and
cost of resources. We first compare the two main
provisioning strategies (reserved and on-demand
resources) on Google Compute Engine (GCE) using three
representative workload scenarios with batch and
latency-critical applications. We show that either
approach is suboptimal for performance or cost. We then
present HCloud, a hybrid provisioning system that uses
both reserved and on-demand resources. HCloud
determines which jobs should be mapped to reserved
versus on-demand resources based on overall load, and
resource unpredictability. It also determines the
optimal instance size an application needs to satisfy
its Quality of Service (QoS) constraints. We
demonstrate that hybrid configurations improve
performance by 2.1x compared to fully on-demand
provisioning, and reduce cost by 46\% compared to fully
reserved systems. We also show that hybrid strategies
are robust to variation in system and job parameters,
such as cost and system load.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Yu:2016:CWM,
author = "Xiao Yu and Pallavi Joshi and Jianwu Xu and Guoliang
Jin and Hui Zhang and Guofei Jiang",
title = "{CloudSeer}: Workflow Monitoring of Cloud
Infrastructures via Interleaved Logs",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "489--502",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872407",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cloud infrastructures provide a rich set of management
tasks that operate computing, storage, and networking
resources in the cloud. Monitoring the executions of
these tasks is crucial for cloud providers to promptly
find and understand problems that compromise cloud
availability. However, such monitoring is challenging
because there are multiple distributed service
components involved in the executions. CloudSeer
enables effective workflow monitoring. It takes a
lightweight non-intrusive approach that purely works on
interleaved logs widely existing in cloud
infrastructures. CloudSeer first builds an automaton
for the workflow of each management task based on
normal executions, and then it checks log messages
against a set of automata for workflow divergences in a
streaming manner. Divergences found during the checking
process indicate potential execution problems, which
may or may not be accompanied by error log messages.
For each potential problem, CloudSeer outputs necessary
context information including the affected task
automaton and related log messages hinting where the
problem occurs to help further diagnosis. Our
experiments on OpenStack, a popular open-source cloud
infrastructure, show that CloudSeer's efficiency and
problem-detection capability are suitable for online
monitoring.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Kwon:2016:LCI,
author = "Yonghwi Kwon and Dohyeong Kim and William Nick Sumner
and Kyungtae Kim and Brendan Saltaformaggio and Xiangyu
Zhang and Dongyan Xu",
title = "{LDX}: Causality Inference by Lightweight Dual
Execution",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "503--515",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872395",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Causality inference, such as dynamic taint analysis,
has many applications (e.g., information leak
detection). It determines whether an event e is
causally dependent on a preceding event c during
execution. We develop a new causality inference engine
LDX. Given an execution, it spawns a slave execution,
in which it mutates c and observes whether any change
is induced at e. To preclude non-determinism, LDX
couples the executions by sharing syscall outcomes. To
handle path differences induced by the perturbation, we
develop a novel on-the-fly execution alignment scheme
that maintains a counter to reflect the progress of
execution. The scheme relies on program analysis and
compiler transformation. LDX can effectively detect
information leak and security attacks with an average
overhead of 6.08\% while running the master and the
slave concurrently on separate CPUs, much lower than
existing systems that require instruction level
monitoring. Furthermore, it has much better accuracy in
causality inference.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Leesatapornwongsa:2016:TTN,
author = "Tanakorn Leesatapornwongsa and Jeffrey F. Lukman and
Shan Lu and Haryadi S. Gunawi",
title = "{TaxDC}: a Taxonomy of Non-Deterministic Concurrency
Bugs in Datacenter Distributed Systems",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "517--530",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872374",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present TaxDC, the largest and most comprehensive
taxonomy of non-deterministic concurrency bugs in
distributed systems. We study 104 distributed
concurrency (DC) bugs from four widely-deployed
cloud-scale datacenter distributed systems, Cassandra,
Hadoop MapReduce, HBase and ZooKeeper. We study DC-bug
characteristics along several axes of analysis such as
the triggering timing condition and input
preconditions, error and failure symptoms, and fix
strategies, collectively stored as 2,083 classification
labels in TaxDC database. We discuss how our study can
open up many new research directions in combating DC
bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Mao:2016:RFR,
author = "Junjie Mao and Yu Chen and Qixue Xiao and Yuanchun
Shi",
title = "{RID}: Finding Reference Count Bugs with Inconsistent
Path Pair Checking",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "531--544",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872389",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reference counts are widely used in OS kernels for
resource management. However, reference counts are not
trivial to be used correctly in large scale programs
because it is left to developers to make sure that an
increment to a reference count is always paired with a
decrement. This paper proposes inconsistent path pair
checking, a novel technique that can statically
discover bugs related to reference counts without
knowing how reference counts should be changed in a
function. A prototype called RID is implemented and
evaluations show that RID can discover more than 80
bugs which were confirmed by the developers in the
latest Linux kernel. The results also show that RID
tends to reveal bugs caused by developers'
misunderstanding on API specifications or error
conditions that are not handled properly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Zhang:2016:MPU,
author = "Huazhe Zhang and Henry Hoffmann",
title = "Maximizing Performance Under a Power Cap: a Comparison
of Hardware, Software, and Hybrid Techniques",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "545--559",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872375",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Power and thermal dissipation constrain multicore
performance scaling. Modern processors are built such
that they could sustain damaging levels of power
dissipation, creating a need for systems that can
implement processor power caps. A particular challenge
is developing systems that can maximize performance
within a power cap, and approaches have been proposed
in both software and hardware. Software approaches are
flexible, allowing multiple hardware resources to be
coordinated for maximum performance, but software is
slow, requiring a long time to converge to the power
target. In contrast, hardware power capping quickly
converges to the the power cap, but only manages
voltage and frequency, limiting its potential
performance. In this work we propose PUPiL, a hybrid
software/hardware power capping system. Unlike previous
approaches, PUPiL combines hardware's fast reaction
time with software's flexibility. We implement PUPiL on
real Linux/x86 platform and compare it to Intel's
commercial hardware power capping system for both
single and multi-application workloads. We find PUPiL
provides the same reaction time as Intel's hardware
with significantly higher performance. On average,
PUPiL outperforms hardware by from 1:18-2:4 depending
on workload and power target. Thus, PUPiL provides a
promising way to enforce power caps with greater
performance than current state-of-the-art hardware-only
approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Fan:2016:CSG,
author = "Songchun Fan and Seyed Majid Zahedi and Benjamin C.
Lee",
title = "The Computational Sprinting Game",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "561--575",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872383",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computational sprinting is a class of mechanisms that
boost performance but dissipate additional power. We
describe a sprinting architecture in which many,
independent chip multiprocessors share a power supply
and sprints are constrained by the chips' thermal
limits and the rack's power limits. Moreover, we
present the computational sprinting game, a multi-agent
perspective on managing sprints. Strategic agents
decide whether to sprint based on application phases
and system conditions. The game produces an equilibrium
that improves task throughput for data analytics
workloads by 4-6$ \times $ over prior greedy heuristics
and performs within 90\% of an upper bound on
throughput from a globally optimized policy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Colin:2016:EIF,
author = "Alexei Colin and Graham Harvey and Brandon Lucia and
Alanson P. Sample",
title = "An Energy-interference-free Hardware-Software Debugger
for Intermittent Energy-harvesting Systems",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "577--589",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872409",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy-autonomous computing devices have the potential
to extend the reach of computing to a scale beyond
either wired or battery-powered systems. However, these
devices pose a unique set of challenges to application
developers who lack both hardware and software support
tools. Energy harvesting devices experience power
intermittence which causes the system to reset and
power-cycle unpredictably, tens to hundreds of times
per second. This can result in code execution errors
that are not possible in continuously-powered systems
and cannot be diagnosed with conventional debugging
tools such as JTAG and/or oscilloscopes. We propose the
Energy-interference-free Debugger, a hardware and
software platform for monitoring and debugging
intermittent systems without adversely effecting their
energy state. The Energy-interference-free Debugger
re-creates a familiar debugging environment for
intermittent software and augments it with debugging
primitives for effective diagnosis of intermittence
bugs. Our evaluation of the Energy-interference-free
Debugger quantifies its energy-interference-freedom and
shows its value in a set of debugging tasks in complex
test programs and several real applications, including
RFID code and a machine-learning-based activity
recognition system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Witchel:2016:PPW,
author = "Emmett Witchel",
title = "Programmer Productivity in a World of Mushy
Interfaces: Challenges of the Post-{ISA} Reality",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "591--591",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2876511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Since 1964, we had the notion that the instruction set
architecture (ISA) is a useful and fairly opaque
abstraction layer between hardware and software.
Software rode hardware's performance wave while
remaining gloriously oblivious to hardware's growing
complexity. Unfortunately, the jig is up. We still have
ISAs, but the abstraction no longer offers seamless
portability---parallel software needs to be tuned for
different core counts, and heterogeneous processing
elements (CPUs, GPUs, accelerators) further complicate
programmability. We are better at building large-scale
heterogeneous processors than we are at programming
them. Maintaining software across multiple current
platforms is difficult and porting to future platforms
is also difficult. There have been many technical
responses: virtual ISAs (e.g., NVIDIA's PTX),
higher-level programming interfaces (e.g., CUDA or
OpenCL), and late-stage compilation and
platform-specific tailoring (e.g., Android ART), etc. A
team of opinionated experts, drawn from the three
ASPLOS communities will examine the problem of
programmer productivity in the post-ISA world, first
from the perspective of their area of expertise and
then noting the contributions from the other two
communities. What research will save us and how? This
wide-ranging debate will frame important research areas
for future work while being grounded in frank
discussion about what has succeeded in the past.
Attendees can expect actionable insight into important
research issues as well an entertaining discussion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Angstadt:2016:RPP,
author = "Kevin Angstadt and Westley Weimer and Kevin Skadron",
title = "{RAPID} Programming of Pattern-Recognition
Processors",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "593--605",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872393",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present RAPID, a high-level programming language
and combined imperative and declarative model for
programming pattern-recognition processors, such as
Micron's Automata Processor (AP). The AP is a novel,
non-Von Neumann architecture for direct execution of
non-deterministic finite automata (NFAs), and has been
demonstrated to provide substantial speedup for a
variety of data-processing applications. RAPID is
clear, maintainable, concise, and efficient both at
compile and run time. Language features, such as code
abstraction and parallel control structures, map well
to pattern-matching problems, providing clarity and
maintainability. For generation of efficient runtime
code, we present algorithms to convert RAPID programs
into finite automata. Further, we introduce a
tessellation technique for configuring the AP, which
significantly reduces compile time, increases
programmer productivity, and improves maintainability.
We evaluate five RAPID programs against custom,
baseline implementations previously demonstrated to be
significantly accelerated by the AP. We find that RAPID
programs are much shorter in length, are expressible at
a higher level of abstraction than their handcrafted
counterparts, and yield generated code that is often
more compact. In addition, our tessellation technique
for configuring the AP has comparable device
utilization to, and results in compilation that is up
to four orders of magnitude faster than, current
solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Sui:2016:PCA,
author = "Xin Sui and Andrew Lenharth and Donald S. Fussell and
Keshav Pingali",
title = "Proactive Control of Approximate Programs",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "607--621",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872402",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Approximate computing trades off accuracy of results
for resources such as energy or computing time. There
is a large and rapidly growing literature on
approximate computing that has focused mostly on
showing the benefits of approximate computing. However,
we know relatively little about how to control
approximation in a disciplined way. In this paper, we
address the problem of controlling approximation for
non-streaming programs that have a set of ``knobs''
that can be dialed up or down to control the level of
approximation of different components in the program.
We formulate this control problem as a constrained
optimization problem, and describe a system called
Capri that uses machine learning to learn cost and
error models for the program, and uses these models to
determine, for a desired level of approximation, knob
settings that optimize metrics such as running time or
energy usage. Experimental results with complex
benchmarks from different problem domains demonstrate
the effectiveness of this approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Park:2016:ATC,
author = "Jongse Park and Emmanuel Amaro and Divya Mahajan and
Bradley Thwaites and Hadi Esmaeilzadeh",
title = "{AxGames}: Towards Crowdsourcing Quality Target
Determination in Approximate Computing",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "623--636",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872376",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Approximate computing trades quality of application
output for higher efficiency and performance.
Approximation is useful only if its impact on
application output quality is acceptable to the users.
However, there is a lack of systematic solutions and
studies that explore users' perspective on the effects
of approximation. In this paper, we seek to provide one
such solution for the developers to probe and discover
the boundary of quality loss that most users will deem
acceptable. We propose AxGames, a crowdsourced solution
that enables developers to readily infer a statistical
common ground from the general public through three
entertaining games. The users engage in these games by
betting on their opinion about the quality loss of the
final output while the AxGames framework collects
statistics about their perceptions. The framework then
statistically analyzes the results to determine the
acceptable levels of quality for a pair of
(application, approximation technique). The three games
are designed such that they effectively capture quality
requirements with various tradeoffs and contexts. To
evaluate AxGames, we examine seven diverse applications
that produce user perceptible outputs and cover a wide
range of domains, including image processing, optical
character recognition, speech to text conversion, and
audio processing. We recruit 700 participants/users
through Amazon's Mechanical Turk to play the games that
collect statistics about their perception on different
levels of quality. Subsequently, the AxGames framework
uses the Clopper-Pearson exact method, which computes a
binomial proportion confidence interval, to analyze the
collected statistics for each level of quality. Using
this analysis, AxGames can statistically project the
quality level that satisfies a given percentage of
users. The developers can use these statistical
projections to tune the level of approximation based on
the user experience. We find that the level of
acceptable quality loss significantly varies across
applications. For instance, to satisfy 90\% of users,
the level of acceptable quality loss is 2\% for one
application (image processing) and 26\% for another
(audio processing). Moreover, the pattern with which
the crowd responds to approximation takes significantly
different shape and form depending on the class of
applications. These results confirm the necessity of
solutions that systematically explore the effect of
approximation on the end user experience.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Bornholt:2016:DBA,
author = "James Bornholt and Randolph Lopez and Douglas M.
Carmean and Luis Ceze and Georg Seelig and Karin
Strauss",
title = "A {DNA}-Based Archival Storage System",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "637--649",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872397",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Demand for data storage is growing exponentially, but
the capacity of existing storage media is not keeping
up. Using DNA to archive data is an attractive
possibility because it is extremely dense, with a raw
limit of 1 exabyte/mm$^3$ (109 GB/mm$^3$ ), and
long-lasting, with observed half-life of over 500
years. This paper presents an architecture for a
DNA-based archival storage system. It is structured as
a key-value store, and leverages common biochemical
techniques to provide random access. We also propose a
new encoding scheme that offers controllable
redundancy, trading off reliability for density. We
demonstrate feasibility, random access, and robustness
of the proposed encoding with wet lab experiments
involving 151 kB of synthesized DNA and a 42 kB
random-access subset, and simulation experiments of
larger sets calibrated to the wet lab experiments.
Finally, we highlight trends in biotechnology that
indicate the impending practicality of DNA storage for
much larger datasets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Prabhakar:2016:GCH,
author = "Raghu Prabhakar and David Koeplinger and Kevin J.
Brown and HyoukJoong Lee and Christopher {De Sa} and
Christos Kozyrakis and Kunle Olukotun",
title = "Generating Configurable Hardware from Parallel
Patterns",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "651--665",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872415",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In recent years the computing landscape has seen an
increasing shift towards specialized accelerators.
Field programmable gate arrays (FPGAs) are particularly
promising for the implementation of these accelerators,
as they offer significant performance and energy
improvements over CPUs for a wide class of applications
and are far more flexible than fixed-function ASICs.
However, FPGAs are difficult to program. Traditional
programming models for reconfigurable logic use
low-level hardware description languages like Verilog
and VHDL, which have none of the productivity features
of modern software languages but produce very efficient
designs, and low-level software languages like C and
OpenCL coupled with high-level synthesis (HLS) tools
that typically produce designs that are far less
efficient. Functional languages with parallel patterns
are a better fit for hardware generation because they
provide high-level abstractions to programmers with
little experience in hardware design and avoid many of
the problems faced when generating hardware from
imperative languages. In this paper, we identify two
important optimizations for using parallel patterns to
generate efficient hardware: tiling and metapipelining.
We present a general representation of tiled parallel
patterns, and provide rules for automatically tiling
patterns and generating metapipelines. We demonstrate
experimentally that these optimizations result in
speedups up to 39.4$ \times $ on a set of benchmarks
from the data analytics domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Chang:2016:DLD,
author = "Li-Wen Chang and Hee-Seok Kim and Wen-mei W. Hwu",
title = "{DySel}: Lightweight Dynamic Selection for
Kernel-based Data-parallel Programming Model",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "667--680",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The rising pressure for simultaneously improving
performance and reducing power is driving more
diversity into all aspects of computing devices. An
algorithm that is well-matched to the target hardware
can run multiple times faster and more energy
efficiently than one that is not. The problem is
complicated by the fact that a program's input also
affects the appropriate choice of algorithm. As a
result, software developers have been faced with the
challenge of determining the appropriate algorithm for
each potential combination of target device and data.
This paper presents DySel, a novel runtime system for
automating such determination for kernel-based data
parallel programming models such as OpenCL, CUDA,
OpenACC, and C++AMP. These programming models cover
many applications that demand high performance in
mobile, cloud and high-performance computing. DySel
systematically deploys candidate kernels on a small
portion of the actual data to determine which achieves
the best performance for the hardware-data combination.
The test-deployment, referred to as micro-profiling,
contributes to the final execution result and incurs
less than 8\% of overhead in the worst observed case
when compared to an oracle. We show four major use
cases where DySel provides significantly more
consistent performance without tedious effort from the
developer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Chen:2016:BQA,
author = "Quan Chen and Hailong Yang and Jason Mars and Lingjia
Tang",
title = "{Baymax}: {QoS} Awareness and Increased Utilization
for Non-Preemptive Accelerators in Warehouse Scale
Computers",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "681--696",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872368",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern warehouse-scale computers (WSCs) are being
outfitted with accelerators to provide the significant
compute required by emerging intelligent personal
assistant (IPA) workloads such as voice recognition,
image classification, and natural language processing.
It is well known that the diurnal user access pattern
of user-facing services provides a strong incentive to
co-locate applications for better accelerator
utilization and efficiency, and prior work has focused
on enabling co-location on multicore processors.
However, interference when co-locating applications on
non-preemptive accelerators is fundamentally different
than contention on multi-core CPUs and introduces a new
set of challenges to reduce QoS violation. To address
this open problem, we first identify the underlying
causes for QoS violation in accelerator-outfitted
servers. Our experiments show that queuing delay for
the compute resources and PCI-e bandwidth contention
for data transfer are the main two factors that
contribute to the long tails of user-facing
applications. We then present Baymax, a runtime system
that orchestrates the execution of compute tasks from
different applications and mitigates PCI-e bandwidth
contention to deliver the required QoS for user-facing
applications and increase the accelerator utilization.
Using DjiNN, a deep neural network service, Sirius, an
end-to-end IPA workload, and traditional applications
on a Nvidia K40 GPU, our evaluation shows that Baymax
improves the accelerator utilization by 91.3\% while
achieving the desired 99\%-ile latency target for for
user-facing applications. In fact, Baymax reduces the
99\%-ile latency of user-facing applications by up to
195x over default execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Nowatzki:2016:ABS,
author = "Tony Nowatzki and Karthikeyan Sankaralingam",
title = "Analyzing Behavior Specialized Acceleration",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "697--711",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872412",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hardware specialization has become a promising
paradigm for overcoming the inefficiencies of general
purpose microprocessors. Of significant interest are
Behavioral Specialized Accelerators (BSAs), which are
designed to efficiently execute code with only certain
properties, but remain largely configurable or
programmable. The most important strength of BSAs ---
their ability to target a wide variety of codes ---
also makes their interactions and analysis complex,
raising the following questions: can multiple BSAs be
composed synergistically, what are their interactions
with the general purpose core, and what combinations
favor which workloads? From a methodological
standpoint, BSAs are also challenging, as they each
require ISA development, compiler and assembler
extensions, and either simulator or RTL models. To
study the potential of BSAs, we propose a novel
modeling technique called the Transformable Dependence
Graph (TDG) --- a higher level alternative to the
time-consuming traditional compiler+simulator approach,
while still enabling detailed microarchitectural models
for both general cores and accelerators. We then
propose a multi-BSA organization, called ExoCore, which
we model and study using the TDG. A design space
exploration reveals that an ExoCore organization can
push designs beyond the established energy-performance
frontiers for general purpose cores. For example, a
2-wide OOO processor with three BSAs matches the
performance of a conventional 6-wide OOO core, has 40\%
lower area, and is 2.6x more energy efficient.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Yoon:2016:PPI,
author = "Man-Ki Yoon and Negin Salajegheh and Yin Chen and
Mihai Christodorescu",
title = "{PIFT}: Predictive Information-Flow Tracking",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "713--725",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872403",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Phones today carry sensitive information and have a
great number of ways to communicate that data. As a
result, malware that steal money, information, or
simply disable functionality have hit the app stores.
Current security solutions for preventing undesirable
data leaks are mostly high-overhead and have not been
practical enough for smartphones. In this paper, we
show that simply monitoring just some instructions
(only memory loads and stores) it is possible to
achieve low overhead, highly accurate information flow
tracking. Our method achieves 98\% accuracy (0\% false
positive and 2\% false negative) over DroidBench and
was able to successfully catch seven real-world malware
instances that steal phone number, location, and device
ID using SMS messages and HTTP connections.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Venkat:2016:HHI,
author = "Ashish Venkat and Sriskanda Shamasunder and Hovav
Shacham and Dean M. Tullsen",
title = "{HIPStR}: Heterogeneous-{ISA} Program State
Relocation",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "727--741",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872408",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous Chip Multiprocessors have been shown to
provide significant performance and energy efficiency
gains over homogeneous designs. Recent research has
expanded the dimensions of heterogeneity to include
diverse Instruction Set Architectures, called
Heterogeneous-ISA Chip Multiprocessors. This work
leverages such an architecture to realize substantial
new security benefits, and in particular, to thwart
Return-Oriented Programming. This paper proposes a
novel security defense called HIPStR ---
Heterogeneous-ISA Program State Relocation --- that
performs dynamic randomization of run-time program
state, both within and across ISAs. This technique
outperforms the state-of-the-art just-in-time code
reuse (JIT-ROP) defense by an average of 15.6\%, while
simultaneously providing greater security guarantees
against classic return-into-libc, ROP, JOP, brute
force, JIT-ROP, and several evasive variants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Aweke:2016:ASB,
author = "Zelalem Birhanu Aweke and Salessawi Ferede Yitbarek
and Rui Qiao and Reetuparna Das and Matthew Hicks and
Yossi Oren and Todd Austin",
title = "{ANVIL}: Software-Based Protection Against
Next-Generation Rowhammer Attacks",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "743--755",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872390",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Ensuring the integrity and security of the memory
system is critical. Recent studies have shown serious
security concerns due to ``rowhammer'' attacks, where
repeated accesses to a row of memory cause bit flips in
adjacent rows. Recent work by Google's Project Zero has
shown how to leverage rowhammer-induced bit-flips as
the basis for security exploits that include malicious
code injection and memory privilege escalation. Being
an important security concern, industry has attempted
to defend against rowhammer attacks. Deployed defenses
employ two strategies: (1) doubling the system DRAM
refresh rate and (2) restricting access to the CLFLUSH
instruction that attackers use to bypass the cache to
increase memory access frequency (i.e., the rate of
rowhammering). We demonstrate that such defenses are
inadequte: we implement rowhammer attacks that both
avoid using the CLFLUSH instruction and cause bit flips
with a doubled refresh rate. Our next-generation
CLFLUSH-free rowhammer attack bypasses the cache by
manipulating cache replacement state to allow frequent
misses out of the last-level cache to DRAM rows of our
choosing. To protect existing systems from more
advanced rowhammer attacks, we develop a software-based
defense, ANVIL, which thwarts all known rowhammer
attacks on existing systems. ANVIL detects rowhammer
attacks by tracking the locality of DRAM accesses using
existing hardware performance counters. Our detector
identifies the rows being frequently accessed (i.e.,
the aggressors), then selectively refreshes the nearby
victim rows to prevent hammering. Experiments running
on real hardware with the SPEC2006 benchmarks show that
ANVIL has less than a 1\% false positive rate and an
average slowdown of 1\%. ANVIL is low-cost and robust,
and our experiments indicate that it is an effective
approach for protecting existing and future systems
from even advanced rowhammer attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Didona:2016:PAM,
author = "Diego Didona and Nuno Diegues and Anne-Marie Kermarrec
and Rachid Guerraoui and Ricardo Neves and Paolo
Romano",
title = "{ProteusTM}: Abstraction Meets Performance in
Transactional Memory",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "757--771",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872385",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Transactional Memory (TM) paradigm promises to
greatly simplify the development of concurrent
applications. This led, over the years, to the creation
of a plethora of TM implementations delivering wide
ranges of performance across workloads. Yet, no
universal implementation fits each and every workload.
In fact, the best TM in a given workload can reveal to
be disastrous for another one. This forces developers
to face the complex task of tuning TM implementations,
which significantly hampers their wide adoption. In
this paper, we address the challenge of automatically
identifying the best TM implementation for a given
workload. Our proposed system, ProteusTM, hides behind
the TM interface a large library of implementations.
Underneath, it leverages a novel multi-dimensional
online optimization scheme, combining two popular
learning techniques: Collaborative Filtering and
Bayesian Optimization. We integrated ProteusTM in GCC
and demonstrate its ability to switch between TMs and
adapt several configuration parameters (e.g., number of
threads). We extensively evaluated ProteusTM, obtaining
average performance {$<$3}\% from optimal, and gains up
to 100x over static alternatives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Shalev:2016:CCS,
author = "Noam Shalev and Eran Harpaz and Hagar Porat and Idit
Keidar and Yaron Weinsberg",
title = "{CSR}: Core Surprise Removal in Commodity Operating
Systems",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "773--787",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872369",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "One of the adverse effects of shrinking transistor
sizes is that processors have become increasingly prone
to hardware faults. At the same time, the number of
cores per die rises. Consequently, core failures can no
longer be ruled out, and future operating systems for
many-core machines will have to incorporate fault
tolerance mechanisms. We present CSR, a strategy for
recovery from unexpected permanent processor faults in
commodity operating systems. Our approach overcomes
surprise removal of faulty cores, and also tolerates
cascading core failures. When a core fails in user
mode, CSR terminates the process executing on that core
and migrates the remaining processes in its run-queue
to other cores. We further show how hardware
transactional memory may be used to overcome failures
in critical kernel code. Our solution is scalable,
incurs low overhead, and is designed to integrate into
modern operating systems. We have implemented it in the
Linux kernel, using Haswell's Transactional
Synchronization Extension, and tested it on a real
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Gangwani:2016:CBS,
author = "Tanmay Gangwani and Adam Morrison and Josep
Torrellas",
title = "{CASPAR}: Breaking Serialization in Lock-Free
Multicore Synchronization",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "789--804",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872400",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In multicores, performance-critical synchronization is
increasingly performed in a lock-free manner using
atomic instructions such as CAS or LL/SC. However, when
many processors synchronize on the same variable,
performance can still degrade significantly. Contending
writes get serialized, creating a non-scalable
condition. Past proposals that build hardware queues of
synchronizing processors do not fundamentally solve
this problem --- at best, they help to efficiently
serialize the contending writes. This paper proposes a
novel architecture that breaks the serialization of
hardware queues and enables the queued processors to
perform lock-free synchronization in parallel. The
architecture, called CASPAR, is able to (1) execute the
CASes in the queued-up processors in parallel through
eager forwarding of expected values, and (2) validate
the CASes in parallel and dequeue groups of processors
at a time. The result is highly-scalable
synchronization. We evaluate CASPAR with simulations of
a 64-core chip. Compared to existing proposals with
hardware queues, CASPAR improves the throughput of
kernels by 32\% on average, and reduces the execution
time of the sections considered in lock-free versions
of applications by 47\% on average. This makes these
sections 2.5x faster than in the original
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Spink:2016:EAI,
author = "Tom Spink and Harry Wagstaff and Bj{\"o}rn Franke",
title = "Efficient asynchronous interrupt handling in a
full-system instruction set simulator",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "1--10",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907953",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Instruction set simulators (ISS) have many uses in
embedded software and hardware development and are
typically based on dynamic binary translation (DBT),
where frequently executed regions of guest instructions
are compiled into host instructions using a
just-in-time (JIT) compiler. Full-system simulation,
which necessitates handling of asynchronous interrupts
from e.g. timers and I/O devices, complicates matters
as control flow is interrupted unpredictably and
diverted from the current region of code. In this paper
we present a novel scheme for handling of asynchronous
interrupts, which integrates seamlessly into a
region-based dynamic binary translator. We first show
that our scheme is correct, i.e. interrupt handling is
not deferred indefinitely, even in the presence of code
regions comprising control flow loops. We demonstrate
that our new interrupt handling scheme is efficient as
we minimise the number of inserted checks. Interrupt
handlers are also presented to the JIT compiler and
compiled to native code, further enhancing the
performance of our system. We have evaluated our scheme
in an ARM simulator using a region-based JIT
compilation strategy. We demonstrate that our solution
reduces the number of dynamic interrupt checks by 73\%,
reduces interrupt service latency by 26\% and improves
throughput of an I/O bound workload by 7\%, over
traditional per-block schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Robinson:2016:CCM,
author = "Forrest J. Robinson and Michael R. Jantz and Prasad A.
Kulkarni",
title = "Code cache management in managed language {VMs} to
reduce memory consumption for embedded systems",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "11--20",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907958",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "The compiled native code generated by a just-in-time
(JIT) compiler in managed language virtual machines
(VM) is placed in a region of memory called the code
cache. Code cache management (CCM) in a VM is
responsible to find and evict methods from the code
cache to maintain execution correctness and manage
program performance for a given code cache size or
memory budget. Effective CCM can also boost program
speed by enabling more aggressive JIT compilation,
powerful optimizations, and improved hardware
instruction cache and I-TLB performance. Though
important, CCM is an overlooked component in VMs. We
find that the default CCM policies in Oracle's
production-grade HotSpot VM perform poorly even at
modest memory pressure. We develop a detailed
simulation-based framework to model and evaluate the
potential efficiency of many different CCM policies in
a controlled and realistic, but VM-independent
environment. We make the encouraging discovery that
effective CCM policies can sustain high program
performance even for very small cache sizes. Our
simulation study provides the rationale and motivation
to improve CCM strategies in existing VMs. We implement
and study the properties of several CCM policies in
HotSpot. We find that in spite of working within the
bounds of the HotSpot VM's current CCM sub-system, our
best CCM policy implementation in HotSpot improves
program performance over the default CCM algorithm by
39\%, 41\%, 55\%, and 50\% with code cache sizes that
are 90\%, 75\%, 50\%, and 25\% of the desired cache
size, on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Nobre:2016:GBI,
author = "Ricardo Nobre and Luiz G. A. Martins and Jo{\~a}o M.
P. Cardoso",
title = "A graph-based iterative compiler pass selection and
phase ordering approach",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "21--30",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907959",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nowadays compilers include tens or hundreds of
optimization passes, which makes it difficult to find
sequences of optimizations that achieve compiled code
more optimized than the one obtained using typical
compiler options such as -O2 and -O3. The problem
involves both the selection of the compiler passes to
use and their ordering in the compilation pipeline. The
improvement achieved by the use of custom phase orders
for each function can be significant, and thus
important to satisfy strict requirements such as the
ones present in high-performance embedded computing
systems. In this paper we present a new and fast
iterative approach to the phase selection and ordering
challenges resulting in compiled code with higher
performance than the one achieved with the standard
optimization levels of the LLVM compiler. The obtained
performance improvements are comparable with the ones
achieved by other iterative approaches while requiring
considerably less time and resources. Our approach is
based on sampling over a graph representing transitions
between compiler passes. We performed a number of
experiments targeting the LEON3 microarchitecture using
the Clang/LLVM 3.7 compiler, considering 140 LLVM
passes and a set of 42 representative signal and image
processing C functions. An exhaustive cross-validation
shows our new exploration method is able to achieve a
geometric mean performance speedup of 1.28x over the
best individually selected -OX flag when considering
100,000 iterations; versus geometric mean speedups from
1.16x to 1.25x obtained with state-of-the-art iterative
methods not using the graph. From the set of
exploration methods tested, our new method is the only
one consistently finding compiler sequences that result
in performance improvements when considering 100 or
less exploration iterations. Specifically, it achieved
geometric mean speedups of 1.08x and 1.16x for 10 and
100 iterations, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Banerjee:2016:TVL,
author = "Kunal Banerjee and Chittaranjan Mandal and Dipankar
Sarkar",
title = "Translation validation of loop and arithmetic
transformations in the presence of recurrences",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "31--40",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907954",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compiler optimization of array-intensive programs
involves extensive application of loop transformations
and arithmetic transformations. Hence, translation
validation of array-intensive programs requires
manipulation of intervals of integers (representing
domains of array indices) and relations over such
intervals to account for loop transformations and
simplification of arithmetic expressions to handle
arithmetic transformations. A major obstacle for
verification of such programs is posed by the presence
of recurrences, whereby an element of an array gets
defined in a statement S inside a loop in terms of some
other element(s) of the same array which have been
previously defined through the same statement S.
Recurrences lead to cycles in the data-dependence graph
of a program which make dependence analyses and
simplifications (through closed-form representations)
of the data transformations difficult. Another
technique which works better for recurrences does not
handle arithmetic transformations. In this work, array
data-dependence graphs (ADDGs) are used to represent
both the original and the optimized versions of the
program and a validation scheme is proposed where the
cycles due to recurrences in the ADDGs are suitably
abstracted as acyclic subgraphs. Thus, this work
provides a unified equivalence checking framework to
handle loop and arithmetic transformations along with
most of the recurrences --- this combination of
features had not been achieved by a single verification
technique earlier.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Sui:2016:LOA,
author = "Yulei Sui and Xiaokang Fan and Hao Zhou and Jingling
Xue",
title = "Loop-oriented array- and field-sensitive pointer
analysis for automatic {SIMD} vectorization",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "41--51",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907957",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compiler-based auto-vectorization is a promising
solution to automatically generate code that makes
efficient use of SIMD processors in high performance
platforms and embedded systems. Two main
auto-vectorization techniques, superword-level
parallelism vectorization (SLP) and loop-level
vectorization (LLV), require precise dependence
analysis on arrays and structs in order to vectorize
isomorphic scalar instructions and/or reduce dynamic
dependence checks incurred at runtime. The alias
analyses used in modern vectorizing compilers are
either intra-procedural (without tracking
inter-procedural data-flows) or inter-procedural (by
using field-insensitive models, which are too imprecise
in handling arrays and structs). This paper proposes an
inter-procedural Loop-oriented Pointer Analysis, called
LPA, for analyzing arrays and structs to support
aggressive SLP and LLV optimizations. Unlike
field-insensitive solutions that preallocate objects
for each memory allocation site, our approach uses a
fine-grained memory model to generate location sets
based on how structs and arrays are accessed. LPA can
precisely analyze arrays and nested aggregate
structures to enable SIMD optimizations for large
programs. By separating the location set generation as
an independent concern from the rest of the pointer
analysis, LPA is designed to reuse easily existing
points-to resolution algorithms. We evaluate LPA using
SLP and LLV, the two classic vectorization techniques
on a set of 20 CPU2000/2006 benchmarks. For SLP, LPA
enables it to vectorize a total of 133 more basic
blocks, with an average of 12.09 per benchmark,
resulting in the best speedup of 2.95\% for 173.applu.
For LLV, LPA has reduced a total of 319 static bound
checks, with an average of 22.79 per benchmark,
resulting in the best speedup of 7.18\% for 177.mesa.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Domagala:2016:GCT,
author = "Lukasz Domagala and Duco van Amstel and Fabrice
Rastello",
title = "Generalized cache tiling for dataflow programs",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "52--61",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907960",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The dataflow programming paradigm has facilitated the
expression of a great number of algorithmic
applications on embedded platforms in a wide variety of
applicative domains. Whether it is a Domain Specific
Language (DSL) or a more generalistic one, the dataflow
paradigm allows to intuitively state the successive
steps of an algorithm and link them through data
communications. The optimization of cache-memory in
this context has been a subject of interest since the
early '90s as the reuse and communication of data
between the agents of a dataflow program is a key
factor in achieving a high-performance implementation
within the reduced limits of embedded architectures. In
order to improve data reuse among the dataflow agents
we propose a modelisation of the communications and
data usage within a dataflow program. Aside from
providing an estimate of the amount of cache-misses
that a given scheduling generates, this model allows us
to specify the associated optimization problem in a
manner that is identical to loop-nest tiling. Improving
on the existing state-of-the-art methods we extend our
tiling technique to include non-uniform dependencies on
one of the dimensions of the iteration space. When
applying the proposed technique to dataflow programs
expressed within the StreamIt framework we are able to
showcase significant reductions in the number of
cache-misses for a majority of test-cases when compared
to existing optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Chu:2016:SEM,
author = "Duc-Hiep Chu and Joxan Jaffar and Rasool Maghareh",
title = "Symbolic execution for memory consumption analysis",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "62--71",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907955",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the advances in both hardware and software of
embedded systems in the past few years, dynamic memory
allocation can now be safely used in embedded software.
As a result, the need to develop methods to avoid heap
overflow errors in safety-critical embedded systems has
increased. Resource analysis of imperative programs
with non-regular loop patterns and signed integers, to
support both memory allocation and deallocation, has
long been an open problem. Existing methods can
generate symbolic bounds that are parametric w.r.t. the
program inputs; such bounds, however, are imprecise in
the presence of non-regular loop patterns. In this
paper, we present a worst-case memory consumption
analysis, based upon the framework of symbolic
execution. Our assumption is that loops (and
recursions) of to-be-analyzed programs are indeed
bounded. We then can exhaustively unroll loops and the
memory consumption of each iteration can be precisely
computed and summarized for aggregation. Because of
path-sensitivity, our algorithm generates more precise
bounds. Importantly, we demonstrate that by introducing
a new concept of reuse, symbolic execution scales to a
set of realistic benchmark programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Metta:2016:TSM,
author = "Ravindra Metta and Martin Becker and Prasad Bokil and
Samarjit Chakraborty and R. Venkatesh",
title = "{TIC}: a scalable model checking based approach to
{WCET} estimation",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "72--81",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907961",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The application of Model Checking to compute WCET has
not been explored as much as Integer Linear Programming
(ILP), primarily because model checkers fail to scale
for complex programs. These programs have loops with
large or unknown bounds, leading to a state space
explosion that model checkers cannot handle. To
overcome this, we have developed a technique, TIC, that
employs slicing, loop acceleration and
over-approximation on time-annotated source code,
enabling Model Checking to scale better for WCET
computation. Further, our approach is parametric, so
that the user can make a trade-off between the
tightness of WCET estimate and the analysis time. We
conducted experiments on the M{\"a}lardalen benchmarks
to evaluate the effect of various abstractions on the
WCET estimate and analysis time. Additionally, we
compared our estimates to those made by an ILP-based
analyzer and found that our estimates were tighter for
more than 30\% of the examples and were equal for the
rest.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Chen:2016:CIM,
author = "Kuan-Hsun Chen and Bj{\"o}rn B{\"o}nninghoff and
Jian-Jia Chen and Peter Marwedel",
title = "Compensate or ignore? {Meeting} control robustness
requirements through adaptive soft-error handling",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "82--91",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907952",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To avoid catastrophic events like unrecoverable system
failures on mobile and embedded systems caused by
soft-errors, software-based error detection and
compensation techniques have been proposed. Methods
like error-correction codes or redundant execution can
offer high flexibility and allow for
application-specific fault-tolerance selection without
the needs of special hardware supports. However, such
software-based approaches may lead to system overload
due to the execution time overhead. An adaptive
deployment of such techniques to meet both application
requirements and system constraints is desired. From
our case study, we observe that a control task can
tolerate limited errors with acceptable performance
loss. Such tolerance can be modeled as a (m,k)
constraint which requires at least m correct runs out
of any k consecutive runs to be correct. In this paper,
we discuss how a given (m,k) constraint can be
satisfied by adopting patterns of task instances with
individual error detection and compensation
capabilities. We introduce static strategies and
provide a formal feasibility analysis for validation.
Furthermore, we develop an adaptive scheme that extends
our initial approach with online awareness that
increases efficiency while preserving analysis results.
The effectiveness of our method is shown in a
real-world case study as well as for synthesized task
sets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Chakraborty:2016:OCP,
author = "Prasenjit Chakraborty and Gautam Doshi and Shashank
Shekhar and Vikrant Kumar",
title = "Opportunity for compute partitioning in pursuit of
energy-efficient systems",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "92--101",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907956",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Performance of computing systems, from handhelds to
supercomputers, is increasingly constrained by the
energy consumed. A significant and increasing fraction
of the energy is consumed in the movement of data. In a
compute node, caches have been very effective in
reducing data movement by exploiting the available data
locality in programs. Program regions with poor data
locality, then effect most of the data movement, and
consequently consume an ever larger fraction of energy.
In this paper we explore the energy efficiency
opportunity of minimizing the data movement in
precisely such program regions, by first imagining the
possibility of compute near memory, and then
partitioning the program's execution between a compute
core and the compute near memory (CnM). Due to the
emergence of 3D stacked memory, a CnM implementation
appears more realistic. Our focus is on evaluating the
partitioning opportunity in applications and to do a
limit study of systems enabled with CnM capabilities to
understand and guide their architectural embodiment. We
describe an automated method of analyzing the data
access pattern of optimized workload binaries, via a
binary-instrumentation tool called SnapCnM, to identify
the beneficial program regions (loops) for CnM
execution.We also perform a limit study to evaluate the
impact of such partitioning over a range of parameters
affecting CnM design choices. Our results show that
compute partitioning a small ({$<$10}\%) fraction of a
workload can improve its energy efficiency from 3\%
(for compute-bound applications) to 27\% (for
memory-bound applications). From the study in this work
we discuss the important aspects that help to shape the
future CnM design space.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Phothilimthana:2016:CGR,
author = "Phitchaya Mangpo Phothilimthana and Michael Schuldt
and Rastislav Bodik",
title = "Compiling a gesture recognition application for a
low-power spatial architecture",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "102--112",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907962",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy efficiency is one of the main performance goals
when designing processors for embedded systems.
Typically, the simpler the processor, the less energy
it consumes. Thus, an ultra-low power multicore
processor will, likely have very small distributed
memory with a simple interconnect. To compile for such
an architecture, a partitioning strategy that can tune
between space and communication minimization is crucial
to fit a program in its limited resources and achieve
good performance. A careful program layout design is
also critical. Aside fulfilling the space constraint, a
compiler needs to be able to optimize for program
latency to satisfy a certain timing requirement as
well. To satisfy all aforementioned constraints, we
present a flexible code partitioning strategy and
light-weight mechanisms to express parallelism and
program layout. First, we compare two strategies for
partitioning program structures and introduce a
language construct to let programmers choose which
strategies to use and when. The compiler then
partitions program structures with a mix of both
strategies. Second, we add supports for
programmer-specified parallelism and program layout
through imposing additional spatial constraints to the
compiler. We evaluate our compiler by implementing an
accelerometer-based gesture recognition application on
GA144, a recent low-power minimalistic multicore
architecture. When compared to MSP430, GA144 is overall
19x more energy-efficient and 23x faster when running
this application. Without these inventions, this
application would not be able to fit on GA144.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Micolet:2016:MLA,
author = "Paul-Jules Micolet and Aaron Smith and Christophe
Dubach",
title = "A machine learning approach to mapping streaming
workloads to dynamic multicore processors",
journal = j-SIGPLAN,
volume = "51",
number = "5",
pages = "113--122",
month = may,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980930.2907951",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:24 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dataflow programming languages facilitate the design
of data intensive programs such as streaming
applications commonly found in embedded systems. They
also expose parallelism that can be exploited using
multicore processors which are now part of the mobile
landscape. In recent years a shift has occurred towards
heterogeneity ( ARM big.LITTLE) and reconfigurability.
Dynamic Multicore Processors (DMPs) bridge the gap
between fully reconfigurable processors and homogeneous
multicore systems. They can re-allocate their resources
at runtime to create larger more powerful logical
processors fine-tuned to the workload. Unfortunately,
there exists no accurate method to determine how to
partition the cores in a DMP among application threads.
Often programmers rely on analyzing the application
manually and using a set of hand picked heuristics.
This leads to sub-optimal performance, reducing the
potential of DMPs. What is needed is a way to determine
the optimal partitioning and grouping of resources to
maximize performance. As a first step, this paper
studies the effect of thread partitioning and hardware
resource allocation on a set of StreamIt applications.
We show that the resulting space is not trivial and
exhibits a large performance variation depending on the
combination of parameters. We introduce a
machine-learning based methodology to tackle the space
complexity. Our machine-learning model is able to
directly predict the best combination of parameters
using static code features. The predicted set of
parameters leads to performance on-par with the best
performance found in a space of more than 32,000
configurations per application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '16 conference proceedings.",
}
@Article{Memarian:2016:DCE,
author = "Kayvan Memarian and Justus Matthiesen and James
Lingard and Kyndylan Nienhuis and David Chisnall and
Robert N. M. Watson and Peter Sewell",
title = "Into the depths of {C}: elaborating the de facto
standards",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "1--15",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908081",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "C remains central to our computing infrastructure. It
is notionally defined by ISO standards, but in reality
the properties of C assumed by systems code and those
implemented by compilers have diverged, both from the
ISO standards and from each other, and none of these
are clearly understood. We make two contributions to
help improve this error-prone situation. First, we
describe an in-depth analysis of the design space for
the semantics of pointers and memory in C as it is used
in practice. We articulate many specific questions,
build a suite of semantic test cases, gather
experimental data from multiple implementations, and
survey what C experts believe about the de facto
standards. We identify questions where there is a
consensus (either following ISO or differing) and where
there are conflicts. We apply all this to an
experimental C implemented above capability hardware.
Second, we describe a formal model, Cerberus, for large
parts of C. Cerberus is parameterised on its memory
model; it is linkable either with a candidate de facto
memory object model, under construction, or with an
operational C11 concurrency model; it is defined by
elaboration to a much simpler Core language for
accessibility, and it is executable as a test oracle on
small examples. This should provide a solid basis for
discussion of what mainstream C is now: what
programmers and analysis tools can assume and what
compilers aim to implement. Ultimately we hope it will
be a step towards clear, consistent, and accepted
semantics for the various use-cases of C.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Chamith:2016:LER,
author = "Buddhika Chamith and Bo Joel Svensson and Luke
Dalessandro and Ryan R. Newton",
title = "Living on the edge: rapid-toggling probes with
cross-modification on x86",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "16--26",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908084",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic probe injection is now a widely used method to
debug performance in production. Current techniques for
dynamic probing of native code, however, rely on an
expensive stop-the-world approach: binary changes are
made within a safe state of the program --- typically
in which all the program threads are halted --- to
ensure that another thread executing the modified code
region doesn't step into a partially-modified code.
Stop-the-world patching is not scalable. In contrast,
low overhead, scalable probes that can be rapidly
toggled on and off in-place would open up new use cases
for statistical profilers and language implementations,
even traditional ahead-of-time, native-code compilers.
In this paper we introduce safe cross-modification
protocols that mutate x86 code between threads but do
not require quiescing threads, resulting in radically
lower overheads than existing solutions. A key problem
is handling instructions that straddle cache lines. We
empirically evaluate existing x86 architectures to
derive a safe policy given current processor behavior,
and we argue that future architectures should clarify
the semantics of instruction fetching to make cheap
cross-modification easier and future proof.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Noonan:2016:PTI,
author = "Matt Noonan and Alexey Loginov and David Cok",
title = "Polymorphic type inference for machine code",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "27--41",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908119",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For many compiled languages, source-level types are
erased very early in the compilation process. As a
result, further compiler passes may convert type-safe
source into type-unsafe machine code. Type-unsafe
idioms in the original source and type-unsafe
optimizations mean that type information in a stripped
binary is essentially nonexistent. The problem of
recovering high-level types by performing type
inference over stripped machine code is called type
reconstruction, and offers a useful capability in
support of reverse engineering and decompilation. In
this paper, we motivate and develop a novel type system
and algorithm for machine-code type inference. The
features of this type system were developed by
surveying a wide collection of common source- and
machine-code idioms, building a catalog of challenging
cases for type reconstruction. We found that these
idioms place a sophisticated set of requirements on the
type system, inducing features such as
recursively-constrained polymorphic types. Many of the
features we identify are often seen only in expressive
and powerful type systems used by high-level functional
languages. Using these type-system features as a
guideline, we have developed Retypd: a novel static
type-inference algorithm for machine code that supports
recursive types, polymorphism, and subtyping. Retypd
yields more accurate inferred types than existing
algorithms, while also enabling new capabilities such
as reconstruction of pointer const annotations with
98\% recall. Retypd can operate on weaker program
representations than the current state of the art,
removing the need for high-quality points-to
information that may be impractical to compute.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Padhi:2016:DDP,
author = "Saswat Padhi and Rahul Sharma and Todd Millstein",
title = "Data-driven precondition inference with learned
features",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "42--56",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908099",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We extend the data-driven approach to inferring
preconditions for code from a set of test executions.
Prior work requires a fixed set of features, atomic
predicates that define the search space of possible
preconditions, to be specified in advance. In contrast,
we introduce a technique for on-demand feature
learning, which automatically expands the search space
of candidate preconditions in a targeted manner as
necessary. We have instantiated our approach in a tool
called PIE. In addition to making precondition
inference more expressive, we show how to apply our
feature-learning technique to the setting of
data-driven loop invariant inference. We evaluate our
approach by using PIE to infer rich preconditions for
black-box OCaml library functions and using our
loop-invariant inference algorithm as part of an
automatic program verifier for C++ programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Sousa:2016:CHL,
author = "Marcelo Sousa and Isil Dillig",
title = "{Cartesian} {Hoare} logic for verifying $k$-safety
properties",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "57--69",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908092",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Unlike safety properties which require the absence of
a ``bad'' program trace, k-safety properties stipulate
the absence of a ``bad'' interaction between $k$
traces. Examples of $k$-safety properties include
transitivity, associativity, anti-symmetry, and
monotonicity. This paper presents a sound and
relatively complete calculus, called Cartesian Hoare
Logic (CHL), for verifying $k$-safety properties. We
also present an automated verification algorithm based
on CHL and implement it in a tool called DESCARTES. We
use DESCARTES to analyze user-defined relational
operators in Java and demonstrate that DESCARTES is
effective at verifying (or finding violations of)
multiple $k$-safety properties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Lee:2016:VBM,
author = "Wonyeol Lee and Rahul Sharma and Alex Aiken",
title = "Verifying bit-manipulations of floating-point",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "70--84",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908107",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reasoning about floating-point is difficult and
becomes only more so if there is an interplay between
floating-point and bit-level operations. Even though
real-world floating-point libraries use implementations
that have such mixed computations, no systematic
technique to verify the correctness of the
implementations of such computations is known. In this
paper, we present the first general technique for
verifying the correctness of mixed binaries, which
combines abstraction, analytical optimization, and
testing. The technique provides a method to compute an
error bound of a given implementation with respect to
its mathematical specification. We apply our technique
to Intel's implementations of transcendental functions
and prove formal error bounds for these widely used
routines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Chen:2016:CDD,
author = "Yuting Chen and Ting Su and Chengnian Sun and Zhendong
Su and Jianjun Zhao",
title = "Coverage-directed differential testing of {JVM}
implementations",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "85--99",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908095",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Java virtual machine (JVM) is a core technology, whose
reliability is critical. Testing JVM implementations
requires painstaking effort in designing test
classfiles (*.class) along with their test oracles. An
alternative is to employ binary fuzzing to
differentially test JVMs by blindly mutating seeding
classfiles and then executing the resulting mutants on
different JVM binaries for revealing inconsistent
behaviors. However, this blind approach is not cost
effective in practice because most of the mutants are
invalid and redundant. This paper tackles this
challenge by introducing classfuzz, a coverage-directed
fuzzing approach that focuses on representative
classfiles for differential testing of JVMs' startup
processes. Our core insight is to (1) mutate seeding
classfiles using a set of predefined mutation operators
(mutators) and employ Markov Chain Monte Carlo (MCMC)
sampling to guide mutator selection, and (2) execute
the mutants on a reference JVM implementation and use
coverage uniqueness as a discipline for accepting
representative ones. The accepted classfiles are used
as inputs to differentially test different JVM
implementations and find defects. We have implemented
classfuzz and conducted an extensive evaluation of it
against existing fuzz testing algorithms. Our
evaluation results show that classfuzz can enhance the
ratio of discrepancy-triggering classfiles from 1.7\%
to 11.9\%. We have also reported 62 JVM discrepancies,
along with the test classfiles, to JVM developers. Many
of our reported issues have already been confirmed as
JVM defects, and some even match recent clarifications
and changes to the Java SE 8 edition of the JVM
specification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Sorensen:2016:EER,
author = "Tyler Sorensen and Alastair F. Donaldson",
title = "Exposing errors related to weak memory in {GPU}
applications",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "100--113",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908114",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the systematic design of a testing
environment that uses stressing and fuzzing to reveal
errors in GPU applications that arise due to weak
memory effects. We evaluate our approach on seven GPUs
spanning three Nvidia architectures, across ten CUDA
applications that use fine-grained concurrency. Our
results show that applications that rarely or never
exhibit errors related to weak memory when executed
natively can readily exhibit these errors when executed
in our testing environment. Our testing environment
also provides a means to help identify the root causes
of such errors, and automatically suggests how to
insert fences that harden an application against weak
memory bugs. To understand the cost of GPU fences, we
benchmark applications with fences provided by the
hardening strategy as well as a more conservative,
sound fencing strategy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Faddegon:2016:LCT,
author = "Maarten Faddegon and Olaf Chitil",
title = "Lightweight computation tree tracing for lazy
functional languages",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "114--128",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908104",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A computation tree of a program execution describes
computations of functions and their dependencies. A
computation tree describes how a program works and is
at the heart of algorithmic debugging. To generate a
computation tree, existing algorithmic debuggers either
use a complex implementation or yield a less
informative approximation. We present a method for lazy
functional languages that requires only a simple
tracing library to generate a detailed computation
tree. With our algorithmic debugger a programmer can
debug any Haskell program by only importing our library
and annotating suspected functions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Hong:2016:EPM,
author = "Changwan Hong and Wenlei Bao and Albert Cohen and
Sriram Krishnamoorthy and Louis-No{\"e}l Pouchet and
Fabrice Rastello and J. Ramanujam and P. Sadayappan",
title = "Effective padding of multidimensional arrays to avoid
cache conflict misses",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "129--144",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908123",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Caches are used to significantly improve performance.
Even with high degrees of set associativity, the number
of accessed data elements mapping to the same set in a
cache can easily exceed the degree of associativity.
This can cause conflict misses and lower performance,
even if the working set is much smaller than cache
capacity. Array padding (increasing the size of array
dimensions) is a well-known optimization technique that
can reduce conflict misses. In this paper, we develop
the first algorithms for optimal padding of arrays
aimed at a set-associative cache for arbitrary tile
sizes. In addition, we develop the first solution to
padding for nested tiles and multi-level caches.
Experimental results with multiple benchmarks
demonstrate a significant performance improvement from
padding.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Zhu:2016:GLE,
author = "Yuhao Zhu and Vijay Janapa Reddi",
title = "{GreenWeb}: language extensions for energy-efficient
mobile web computing",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "145--160",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908082",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Web computing is gradually shifting toward mobile
devices, in which the energy budget is severely
constrained. As a result, Web developers must be
conscious of energy efficiency. However, current Web
languages provide developers little control over energy
consumption. In this paper, we take a first step toward
language-level research to enable energy-efficient Web
computing. Our key motivation is that mobile systems
can wisely budget energy usage if informed with user
quality-of-service (QoS) constraints. To do this,
programmers need new abstractions. We propose two
language abstractions, QoS type and QoS target, to
capture two fundamental aspects of user QoS experience.
We then present GreenWeb, a set of language extensions
that empower developers to easily express the QoS
abstractions as program annotations. As a proof of
concept, we develop a GreenWeb runtime, which
intelligently determines how to deliver specified user
QoS expectation while minimizing energy consumption.
Overall, GreenWeb shows significant energy savings
(29.2\% ~ 66.0\%) over Android's default Interactive
governor with few QoS violations. Our work demonstrates
a promising first step toward language innovations for
energy-efficient Web computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Laurenzano:2016:IRU,
author = "Michael A. Laurenzano and Parker Hill and Mehrzad
Samadi and Scott Mahlke and Jason Mars and Lingjia
Tang",
title = "Input responsiveness: using canary inputs to
dynamically steer approximation",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "161--176",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908087",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces Input Responsive Approximation
(IRA), an approach that uses a canary input --- a small
program input carefully constructed to capture the
intrinsic properties of the original input --- to
automatically control how program approximation is
applied on an input-by-input basis. Motivating this
approach is the observation that many of the prior
techniques focusing on choosing how to approximate
arrive at conservative decisions by discounting
substantial differences between inputs when applying
approximation. The main challenges in overcoming this
limitation lie in making the choice of how to
approximate both effectively (e.g., the fastest
approximation that meets a particular accuracy target)
and rapidly for every input. With IRA, each time the
approximate program is run, a canary input is
constructed and used dynamically to quickly test a
spectrum of approximation alternatives. Based on these
runtime tests, the approximation that best fits the
desired accuracy constraints is selected and applied to
the full input to produce an approximate result. We use
IRA to select and parameterize mixes of four
approximation techniques from the literature for a
range of 13 image processing, machine learning, and
data mining applications. Our results demonstrate that
IRA significantly outperforms prior approaches,
delivering an average of 10.2$ \times $ speedup over
exact execution while minimizing accuracy losses in
program outputs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Achour:2016:CSP,
author = "Sara Achour and Rahul Sarpeshkar and Martin C.
Rinard",
title = "Configuration synthesis for programmable analog
devices with {Arco}",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "177--193",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908116",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmable analog devices have emerged as a powerful
computing substrate for performing complex neuromorphic
and cytomorphic computations. We present Arco, a new
solver that, given a dynamical system specification in
the form of a set of differential equations, generates
physically realizable configurations for programmable
analog devices that are algebraically equivalent to the
specified system. On a set of benchmarks from the
biological domain, Arco generates configurations with
35 to 534 connections and 28 to 326 components in 1 to
54 minutes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Madsen:2016:DFD,
author = "Magnus Madsen and Ming-Ho Yee and Ondrej Lhot{\'a}k",
title = "From {Datalog} to {Flix}: a declarative language for
fixed points on lattices",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "194--208",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908096",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Flix, a declarative programming language
for specifying and solving least fixed point problems,
particularly static program analyses. Flix is inspired
by Datalog and extends it with lattices and monotone
functions. Using Flix, implementors of static analyses
can express a broader range of analyses than is
currently possible in pure Datalog, while retaining its
familiar rule-based syntax. We define a model-theoretic
semantics of Flix as a natural extension of the Datalog
semantics. This semantics captures the declarative
meaning of Flix programs without imposing any specific
evaluation strategy. An efficient strategy is
semi-naive evaluation which we adapt for Flix. We have
implemented a compiler and runtime for Flix, and used
it to express several well-known static analyses,
including the IFDS and IDE algorithms. The declarative
nature of Flix clearly exposes the similarity between
these two algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Truong:2016:LLC,
author = "Leonard Truong and Rajkishore Barik and Ehsan Totoni
and Hai Liu and Chick Markley and Armando Fox and
Tatiana Shpeisman",
title = "{Latte}: a language, compiler, and runtime for elegant
and efficient deep neural networks",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "209--223",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908105",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deep neural networks (DNNs) have undergone a surge in
popularity with consistent advances in the state of the
art for tasks including image recognition, natural
language processing, and speech recognition. The
computationally expensive nature of these networks has
led to the proliferation of implementations that
sacrifice abstraction for high performance. In this
paper, we present Latte, a domain-specific language for
DNNs that provides a natural abstraction for specifying
new layers without sacrificing performance. Users of
Latte express DNNs as ensembles of neurons with
connections between them. The Latte compiler
synthesizes a program based on the user specification,
applies a suite of domain-specific and general
optimizations, and emits efficient machine code for
heterogeneous architectures. Latte also includes a
communication runtime for distributed memory
data-parallelism. Using networks described using Latte,
we demonstrate 3-6x speedup over Caffe (C++/MKL) on the
three state-of-the-art ImageNet models executing on an
Intel Xeon E5-2699 v3 x86 CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Adams:2016:CPP,
author = "Michael D. Adams and Celeste Hollenbeck and Matthew
Might",
title = "On the complexity and performance of parsing with
derivatives",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "224--236",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908128",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Current algorithms for context-free parsing inflict a
trade-off between ease of understanding, ease of
implementation, theoretical complexity, and practical
performance. No algorithm achieves all of these
properties simultaneously. Might et al. introduced
parsing with derivatives, which handles arbitrary
context-free grammars while being both easy to
understand and simple to implement. Despite much
initial enthusiasm and a multitude of independent
implementations, its worst-case complexity has never
been proven to be better than exponential. In fact,
high-level arguments claiming it is fundamentally
exponential have been advanced and even accepted as
part of the folklore. Performance ended up being
sluggish in practice, and this sluggishness was taken
as informal evidence of exponentiality. In this paper,
we reexamine the performance of parsing with
derivatives. We have discovered that it is not
exponential but, in fact, cubic. Moreover, simple
(though perhaps not obvious) modifications to the
implementation by Might et al. lead to an
implementation that is not only easy to understand but
also highly performant in practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Heule:2016:SSA,
author = "Stefan Heule and Eric Schkufza and Rahul Sharma and
Alex Aiken",
title = "Stratified synthesis: automatically learning the
x86-64 instruction set",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "237--250",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908121",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The x86-64 ISA sits at the bottom of the software
stack of most desktop and server software. Because of
its importance, many software analysis and verification
tools depend, either explicitly or implicitly, on
correct modeling of the semantics of x86-64
instructions. However, formal semantics for the x86-64
ISA are difficult to obtain and often written manually
through great effort. We describe an automatically
synthesized formal semantics of the input/output
behavior for a large fraction of the x86-64 Haswell
ISA's many thousands of instruction variants. The key
to our results is stratified synthesis, where we use a
set of instructions whose semantics are known to
synthesize the semantics of additional instructions
whose semantics are unknown. As the set of formally
described instructions increases, the synthesis
vocabulary expands, making it possible to synthesize
the semantics of increasingly complex instructions.
Using this technique we automatically synthesized
formal semantics for 1,795 instruction variants of the
x86-64 Haswell ISA. We evaluate the learned semantics
against manually written semantics (where available)
and find that they are formally equivalent with the
exception of 50 instructions, where the manually
written semantics contain an error. We further find the
learned formulas to be largely as precise as manually
written ones and of similar size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Eizenberg:2016:ROD,
author = "Ariel Eizenberg and Shiliang Hu and Gilles Pokam and
Joseph Devietti",
title = "{Remix}: online detection and repair of cache
contention for the {JVM}",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "251--265",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908090",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As ever more computation shifts onto multicore
architectures, it is increasingly critical to find
effective ways of dealing with multithreaded
performance bugs like true and false sharing. Previous
approaches to fixing false sharing in unmanaged
languages have employed highly-invasive runtime program
modifications. We observe that managed language
runtimes, with garbage collection and JIT code
compilation, present unique opportunities to repair
such bugs directly, mirroring the techniques used in
manual repairs. We present Remix, a modified version of
the Oracle HotSpot JVM which can detect cache
contention bugs and repair false sharing at runtime.
Remix's detection mechanism leverages recent
performance counter improvements on Intel platforms,
which allow for precise, unobtrusive monitoring of
cache contention at the hardware level. Remix can
detect and repair known false sharing issues in the
LMAX Disruptor high-performance inter-thread messaging
library and the Spring Reactor event-processing
framework, automatically providing 1.5-2x speedups over
unoptimized code and matching the performance of
hand-optimization. Remix also finds a new false sharing
bug in SPECjvm2008, and uncovers a true sharing bug in
the HotSpot JVM that, when fixed, improves the
performance of three NAS Parallel Benchmarks by 7-25x.
Remix incurs no statistically-significant performance
overhead on other benchmarks that do not exhibit cache
contention, making Remix practical for always-on use.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{David:2016:SSB,
author = "Yaniv David and Nimrod Partush and Eran Yahav",
title = "Statistical similarity of binaries",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "266--280",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908126",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We address the problem of finding similar procedures
in stripped binaries. We present a new statistical
approach for measuring the similarity between two
procedures. Our notion of similarity allows us to find
similar code even when it has been compiled using
different compilers, or has been modified. The main
idea is to use similarity by composition: decompose the
code into smaller comparable fragments, define semantic
similarity between fragments, and use statistical
reasoning to lift fragment similarity into similarity
between procedures. We have implemented our approach in
a tool called Esh, and applied it to find various
prominent vulnerabilities across compilers and
versions, including Heartbleed, Shellshock and Venom.
We show that Esh produces high accuracy results, with
few to no false positives --- a crucial factor in the
scenario of vulnerability search in stripped
binaries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Zhang:2016:ABS,
author = "Yizhou Zhang and Guido Salvaneschi and Quinn Beightol
and Barbara Liskov and Andrew C. Myers",
title = "Accepting blame for safe tunneled exceptions",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "281--295",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908086",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Unhandled exceptions crash programs, so a compile-time
check that exceptions are handled should in principle
make software more reliable. But designers of some
recent languages have argued that the benefits of
statically checked exceptions are not worth the costs.
We introduce a new statically checked exception
mechanism that addresses the problems with existing
checked-exception mechanisms. In particular, it
interacts well with higher-order functions and other
design patterns. The key insight is that whether an
exception should be treated as a ``checked'' exception
is not a property of its type but rather of the context
in which the exception propagates. Statically checked
exceptions can ``tunnel'' through code that is
oblivious to their presence, but the type system
nevertheless checks that these exceptions are handled.
Further, exceptions can be tunneled without being
accidentally caught, by expanding the space of
exception identifiers to identify the
exception-handling context. The resulting mechanism is
expressive and syntactically light, and can be
implemented efficiently. We demonstrate the
expressiveness of the mechanism using significant
codebases and evaluate its performance. We have
implemented this new exception mechanism as part of the
new Genus programming language, but the mechanism could
equally well be applied to other programming
languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Kent:2016:OTM,
author = "Andrew M. Kent and David Kempe and Sam
Tobin-Hochstadt",
title = "Occurrence typing modulo theories",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "296--309",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908091",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new type system combining occurrence
typing --- a technique previously used to type check
programs in dynamically-typed languages such as Racket,
Clojure, and JavaScript --- with dependent refinement
types. We demonstrate that the addition of refinement
types allows the integration of arbitrary solver-backed
reasoning about logical propositions from external
theories. By building on occurrence typing, we can add
our enriched type system as a natural extension of
Typed Racket, reusing its core while increasing its
expressiveness. The result is a well-tested type system
with a conservative, decidable core in which types may
depend on a small but extensible set of program terms.
In addition to describing our design, we present the
following: a formal model and proof of correctness; a
strategy for integrating new theories, with specific
examples including linear arithmetic and bitvectors;
and an evaluation in the context of the full Typed
Racket implementation. Specifically, we take safe
vector operations as a case study, examining all vector
accesses in a 56,000 line corpus of Typed Racket
programs. Our system is able to prove that 50\% of
these are safe with no new annotations, and with a few
annotations and modifications we capture more than
70\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Vekris:2016:RTT,
author = "Panagiotis Vekris and Benjamin Cosman and Ranjit
Jhala",
title = "Refinement types for {TypeScript}",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "310--325",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908110",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Refined TypeScript (RSC), a lightweight
refinement type system for TypeScript, that enables
static verification of higher-order, imperative
programs. We develop a formal system for RSC that
delineates the interaction between refinement types and
mutability, and enables flow-sensitive reasoning by
translating input programs to an equivalent
intermediate SSA form. By establishing type safety for
the intermediate form, we prove safety for the input
programs. Next, we extend the core to account for
imperative and dynamic features of TypeScript,
including overloading, type reflection, ad hoc type
hierarchies and object initialization. Finally, we
evaluate RSC on a set of real-world benchmarks,
including parts of the Octane benchmarks, D3,
Transducers, and the TypeScript compiler. We show how
RSC successfully establishes a number of value
dependent properties, such as the safety of array
accesses and downcasts, while incurring a modest
overhead in type annotations and code restructuring.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Smith:2016:MPS,
author = "Calvin Smith and Aws Albarghouthi",
title = "{MapReduce} program synthesis",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "326--340",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908102",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "By abstracting away the complexity of distributed
systems, large-scale data processing
platforms-MapReduce, Hadoop, Spark, Dryad, etc.-have
provided developers with simple means for harnessing
the power of the cloud. In this paper, we ask whether
we can automatically synthesize MapReduce-style
distributed programs from input-output examples. Our
ultimate goal is to enable end users to specify
large-scale data analyses through the simple interface
of examples. We thus present a new algorithm and tool
for synthesizing programs composed of efficient
data-parallel operations that can execute on cloud
computing infrastructure. We evaluate our tool on a
range of real-world big-data analysis tasks and general
computations. Our results demonstrate the efficiency of
our approach and the small number of examples it
requires to synthesize correct, scalable programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Chugh:2016:PDM,
author = "Ravi Chugh and Brian Hempel and Mitchell Spradlin and
Jacob Albers",
title = "Programmatic and direct manipulation, together at
last",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "341--354",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908103",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Direct manipulation interfaces and programmatic
systems have distinct and complementary strengths. The
former provide intuitive, immediate visual feedback and
enable rapid prototyping, whereas the latter enable
complex, reusable abstractions. Unfortunately, existing
systems typically force users into just one of these
two interaction modes. We present a system called
Sketch-n-Sketch that integrates programmatic and direct
manipulation for the particular domain of Scalable
Vector Graphics (SVG). In Sketch-n-Sketch, the user
writes a program to generate an output SVG canvas. Then
the user may directly manipulate the canvas while the
system immediately infers a program update in order to
match the changes to the output, a workflow we call
live synchronization. To achieve this, we propose (i) a
technique called trace-based program synthesis that
takes program execution history into account in order
to constrain the search space and (ii) heuristics for
dealing with ambiguities. Based on our experience with
examples spanning 2,000 lines of code and from the
results of a preliminary user study, we believe that
Sketch-n-Sketch provides a novel workflow that can
augment traditional programming systems. Our approach
may serve as the basis for live synchronization in
other application domains, as well as a starting point
for yet more ambitious ways of combining programmatic
and direct manipulation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Loncaric:2016:FSF,
author = "Calvin Loncaric and Emina Torlak and Michael D.
Ernst",
title = "Fast synthesis of fast collections",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "355--368",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908122",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many applications require specialized data structures
not found in the standard libraries, but implementing
new data structures by hand is tedious and error-prone.
This paper presents a novel approach for synthesizing
efficient implementations of complex collection data
structures from high-level specifications that describe
the desired retrieval operations. Our approach handles
a wider range of data structures than previous work,
including structures that maintain an order among their
elements or have complex retrieval methods. We have
prototyped our approach in a data structure synthesizer
called Cozy. Four large, real-world case studies
compare structures generated by Cozy against
handwritten implementations in terms of correctness and
performance. Structures synthesized by Cozy match the
performance of handwritten data structures while
avoiding human error.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{McClurg:2016:EDN,
author = "Jedidiah McClurg and Hossein Hojjat and Nate Foster
and Pavol Cern{\'y}",
title = "Event-driven network programming",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "369--385",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908097",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software-defined networking (SDN) programs must
simultaneously describe static forwarding behavior and
dynamic updates in response to events. Event-driven
updates are critical to get right, but difficult to
implement correctly due to the high degree of
concurrency in networks. Existing SDN platforms offer
weak guarantees that can break application invariants,
leading to problems such as dropped packets, degraded
performance, security violations, etc. This paper
introduces EVENT-DRIVEN CONSISTENT UPDATES that are
guaranteed to preserve well-defined behaviors when
transitioning between configurations in response to
events. We propose NETWORK EVENT STRUCTURES (NESs) to
model constraints on updates, such as which events can
be enabled simultaneously and causal dependencies
between events. We define an extension of the NetKAT
language with mutable state, give semantics to stateful
programs using NESs, and discuss provably-correct
strategies for implementing NESs in SDNs. Finally, we
evaluate our approach empirically, demonstrating that
it gives well-defined consistency guarantees while
avoiding expensive synchronization and packet
buffering.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Beckett:2016:TN,
author = "Ryan Beckett and Michael Greenberg and David Walker",
title = "Temporal {NetKAT}",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "386--401",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908108",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the past 5-10 years, the rise of software-defined
networking (SDN) has inspired a wide range of new
systems, libraries, hypervisors and languages for
programming, monitoring, and debugging network
behavior. Oftentimes, these systems are disjoint-one
language for programming and another for verification,
and yet another for run-time monitoring and debugging.
In this paper, we present a new, unified framework,
called Temporal NetKAT, capable of facilitating all of
these tasks at once. As its name suggests, Temporal
NetKAT is the synthesis of two formal theories:
past-time (finite trace) linear temporal logic and
(network) Kleene Algebra with Tests. Temporal
predicates allow programmers to write down concise
properties of a packet's path through the network and
to make dynamic packet-forwarding, access control or
debugging decisions on that basis. In addition to being
useful for programming, the combined equational theory
of LTL and NetKAT facilitates proofs of path-based
correctness properties. Using new, general, proof
techniques, we show that the equational semantics is
sound with respect to the denotational semantics, and,
for a class of programs we call network-wide programs,
complete. We have also implemented a compiler for
temporal NetKAT, evaluated its performance on a range
of benchmarks, and studied the effectiveness of several
optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{El-Hassany:2016:SCA,
author = "Ahmed El-Hassany and Jeremie Miserez and Pavol Bielik
and Laurent Vanbever and Martin Vechev",
title = "{SDNRacer}: concurrency analysis for software-defined
networks",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "402--415",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908124",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrency violations are an important source of bugs
in Software-Defined Networks (SDN), often leading to
policy or invariant violations. Unfortunately,
concurrency violations are also notoriously difficult
to avoid, detect and debug. This paper presents a novel
approach and a tool, SDNRacer, for detecting
concurrency violations of SDNs. Our approach is enabled
by three key ingredients: (i) a precise happens-before
model for SDNs that captures when events can happen
concurrently; (ii) a set of sound, domain-specific
filters that reduce reported violations by orders of
magnitude, and; (iii) a sound and complete dynamic
analyzer, based on the above, that can ensure the
network is free of harmful errors such as data races
and per-packet incoherence. We evaluated SDNRacer on
several real-world OpenFlow controllers, running both
reactive and proactive applications in large networks.
We show that SDNRacer is practically effective: it
quickly pinpoints harmful concurrency violations
without overwhelming the user with false positives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Shambaugh:2016:RCV,
author = "Rian Shambaugh and Aaron Weiss and Arjun Guha",
title = "Rehearsal: a configuration verification tool for
puppet",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "416--430",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908083",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large-scale data centers and cloud computing have
turned system configuration into a challenging problem.
Several widely-publicized outages have been blamed not
on software bugs, but on configuration bugs. To cope,
thousands of organizations use system configuration
languages to manage their computing infrastructure. Of
these, Puppet is the most widely used with thousands of
paying customers and many more open-source users. The
heart of Puppet is a domain-specific language that
describes the state of a system. Puppet already
performs some basic static checks, but they only
prevent a narrow range of errors. Furthermore, testing
is ineffective because many errors are only triggered
under specific machine states that are difficult to
predict and reproduce. With several examples, we show
that a key problem with Puppet is that configurations
can be non-deterministic. This paper presents
Rehearsal, a verification tool for Puppet
configurations. Rehearsal implements a sound, complete,
and scalable determinacy analysis for Puppet. To
develop it, we (1) present a formal semantics for
Puppet, (2) use several analyses to shrink our models
to a tractable size, and (3) frame determinism-checking
as decidable formulas for an SMT solver. Rehearsal then
leverages the determinacy analysis to check other
important properties, such as idempotency. Finally, we
apply Rehearsal to several real-world Puppet
configurations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Chen:2016:TCV,
author = "Hao Chen and Xiongnan (Newman) Wu and Zhong Shao and
Joshua Lockerman and Ronghui Gu",
title = "Toward compositional verification of interruptible
{OS} kernels and device drivers",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "431--447",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908101",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "An operating system (OS) kernel forms the lowest level
of any system software stack. The correctness of the OS
kernel is the basis for the correctness of the entire
system. Recent efforts have demonstrated the
feasibility of building formally verified
general-purpose kernels, but it is unclear how to
extend their work to verify the functional correctness
of device drivers, due to the non-local effects of
interrupts. In this paper, we present a novel
compositional framework for building certified
interruptible OS kernels with device drivers. We
provide a general device model that can be instantiated
with various hardware devices, and a realistic formal
model of interrupts, which can be used to reason about
interruptible code. We have realized this framework in
the Coq proof assistant. To demonstrate the
effectiveness of our new approach, we have successfully
extended an existing verified non-interruptible kernel
with our framework and turned it into an interruptible
kernel with verified device drivers. To the best of our
knowledge, this is the first verified interruptible
operating system with device drivers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Mullen:2016:VPO,
author = "Eric Mullen and Daryl Zuniga and Zachary Tatlock and
Dan Grossman",
title = "Verified peephole optimizations for {CompCert}",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "448--461",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908109",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transformations over assembly code are common in many
compilers. These transformations are also some of the
most bug-dense compiler components. Such bugs could be
eliminated by formally verifying the compiler, but
state-of-the-art formally verified compilers like
CompCert do not support assembly-level program
transformations. This paper presents Peek, a framework
for expressing, verifying, and running
meaning-preserving assembly-level program
transformations in CompCert. Peek contributes four new
components: a lower level semantics for CompCert x86
syntax, a liveness analysis, a library for expressing
and verifying peephole optimizations, and a verified
peephole optimization pass built into CompCert. Each of
these is accompanied by a correctness proof in Coq
against realistic assumptions about the calling
convention and the system memory allocator. Verifying
peephole optimizations in Peek requires proving only a
set of local properties, which we have proved are
sufficient to ensure global transformation correctness.
We have proven these local properties for 28 peephole
transformations from the literature. We discuss the
development of our new assembly semantics, liveness
analysis, representation of program transformations,
and execution engine; describe the verification
challenges of each component; and detail techniques we
applied to mitigate the proof burden.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Ren:2016:JTS,
author = "Brianna M. Ren and Jeffrey S. Foster",
title = "Just-in-time static type checking for dynamic
languages",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "462--476",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908127",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic languages such as Ruby, Python, and JavaScript
have many compelling benefits, but the lack of static
types means subtle errors can remain latent in code for
a long time. While many researchers have developed
various systems to bring some of the benefits of static
types to dynamic languages, prior approaches have
trouble dealing with metaprogramming, which generates
code as the program executes. In this paper, we propose
Hummingbird, a new system that uses a novel technique,
just-in-time static type checking, to type check Ruby
code even in the presence of metaprogramming. In
Hummingbird, method type signatures are gathered
dynamically at run-time, as those methods are created.
When a method is called, Hummingbird statically type
checks the method body against current type signatures.
Thus, Hummingbird provides thorough static checks on a
per-method basis, while also allowing arbitrarily
complex metaprogramming. For performance, Hummingbird
memoizes the static type checking pass, invalidating
cached checks only if necessary. We formalize
Hummingbird using a core, Ruby-like language and prove
it sound. To evaluate Hummingbird, we applied it to six
apps, including three that use Ruby on Rails, a
powerful framework that relies heavily on
metaprogramming. We found that all apps typecheck
successfully using Hummingbird, and that Hummingbird's
performance overhead is reasonable. We applied
Hummingbird to earlier versions of one Rails app and
found several type errors that had been introduced and
then fixed. Lastly, we demonstrate using Hummingbird in
Rails development mode to typecheck an app as live
updates are applied to it.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Petricek:2016:TDM,
author = "Tomas Petricek and Gustavo Guerra and Don Syme",
title = "Types from data: making structured data first-class
citizens in {F\#}",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "477--490",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908115",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most modern applications interact with external
services and access data in structured formats such as
XML, JSON and CSV. Static type systems do not
understand such formats, often making data access more
cumbersome. Should we give up and leave the messy world
of external data to dynamic typing and runtime checks?
Of course, not! We present F\# Data, a library that
integrates external structured data into F\#. As most
real-world data does not come with an explicit schema,
we develop a shape inference algorithm that infers a
shape from representative sample documents. We then
integrate the inferred shape into the F\# type system
using type providers. We formalize the process and
prove a relative type soundness theorem. Our library
significantly reduces the amount of data access code
and it provides additional safety guarantees when
contrasted with the widely used weakly typed
techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Zhu:2016:ALS,
author = "He Zhu and Gustavo Petri and Suresh Jagannathan",
title = "Automatically learning shape specifications",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "491--507",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908125",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a novel automated procedure for
discovering expressive shape specifications for
sophisticated functional data structures. Our approach
extracts potential shape predicates based on the
definition of constructors of arbitrary user-defined
inductive data types, and combines these predicates
within an expressive first-order specification language
using a lightweight data-driven learning procedure.
Notably, this technique requires no programmer
annotations, and is equipped with a type-based decision
procedure to verify the correctness of discovered
specifications. Experimental results indicate that our
implementation is both efficient and effective, capable
of automatically synthesizing sophisticated shape
specifications over a range of complex data types,
going well beyond the scope of existing solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Yaghmazadeh:2016:STH,
author = "Navid Yaghmazadeh and Christian Klinger and Isil
Dillig and Swarat Chaudhuri",
title = "Synthesizing transformations on hierarchically
structured data",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "508--521",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908088",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a new approach for synthesizing
transformations on tree-structured data, such as Unix
directories and XML documents. We consider a general
abstraction for such data, called hierarchical data
trees (HDTs) and present a novel example-driven
synthesis algorithm for HDT transformations. Our
central insight is to reduce the problem of
synthesizing tree transformers to the synthesis of list
transformations that are applied to the paths of the
tree. The synthesis problem over lists is solved using
a new algorithm that combines SMT solving and decision
tree learning. We have implemented our technique in a
system called HADES and show that HADES can
automatically synthesize a variety of interesting
transformations collected from online forums.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Polikarpova:2016:PSP,
author = "Nadia Polikarpova and Ivan Kuraj and Armando
Solar-Lezama",
title = "Program synthesis from polymorphic refinement types",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "522--538",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908093",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a method for synthesizing recursive
functions that provably satisfy a given specification
in the form of a polymorphic refinement type. We
observe that such specifications are particularly
suitable for program synthesis for two reasons. First,
they offer a unique combination of expressive power and
decidability, which enables automatic verification-and
hence synthesis-of nontrivial programs. Second, a
type-based specification for a program can often be
effectively decomposed into independent specifications
for its components, causing the synthesizer to consider
fewer component combinations and leading to a
combinatorial reduction in the size of the search
space. At the core of our synthesis procedure is a new
algorithm for refinement type checking, which supports
specification decomposition. We have evaluated our
prototype implementation on a large set of synthesis
problems and found that it exceeds the state of the art
in terms of both scalability and usability. The tool
was able to synthesize more complex programs than those
reported in prior work (several sorting algorithms and
operations on balanced search trees), as well as most
of the benchmarks tackled by existing synthesizers,
often starting from a more concise and intuitive user
input.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Maleki:2016:HOT,
author = "Sepideh Maleki and Annie Yang and Martin Burtscher",
title = "Higher-order and tuple-based massively-parallel prefix
sums",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "539--552",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908089",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Prefix sums are an important parallel primitive,
especially in massively-parallel programs. This paper
discusses two orthogonal generalizations thereof, which
we call higher-order and tuple-based prefix sums.
Moreover, it describes and evaluates SAM, a
GPU-friendly algorithm for computing prefix sums and
other scans that directly supports higher orders and
tuple values. Its templated CUDA implementation unifies
all of these computations in a single 100-statement
kernel. SAM is communication-efficient in the sense
that it minimizes main-memory accesses. When computing
prefix sums of a million or more values, it outperforms
Thrust and CUDPP on both a Titan X and a K40 GPU. On
the Titan X, SAM reaches memory-copy speeds for large
input sizes, which cannot be surpassed. SAM outperforms
CUB, the currently fastest conventional prefix sum
implementation, by up to a factor of 2.9 on
eighth-order prefix sums and by up to a factor of 2.6
on eight-tuple prefix sums.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Kim:2016:DOF,
author = "Junghyun Kim and Gangwon Jo and Jaehoon Jung and
Jungwon Kim and Jaejin Lee",
title = "A distributed {OpenCL} framework using redundant
computation and data replication",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "553--569",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908094",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Applications written solely in OpenCL or CUDA cannot
execute on a cluster as a whole. Most previous
approaches that extend these programming models to
clusters are based on a common idea: designating a
centralized host node and coordinating the other nodes
with the host for computation. However, the centralized
host node is a serious performance bottleneck when the
number of nodes is large. In this paper, we propose a
scalable and distributed OpenCL framework called
SnuCL-D for large-scale clusters. SnuCL-D's remote
device virtualization provides an OpenCL application
with an illusion that all compute devices in a cluster
are confined in a single node. To reduce the amount of
control-message and data communication between nodes,
SnuCL-D replicates the OpenCL host program execution
and data in each node. We also propose a new OpenCL
host API function and a queueing optimization technique
that significantly reduce the overhead incurred by the
previous centralized approaches. To show the
effectiveness of SnuCL-D, we evaluate SnuCL-D with a
microbenchmark and eleven benchmark applications on a
large-scale CPU cluster and a medium-scale GPU
cluster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Degenbaev:2016:ITG,
author = "Ulan Degenbaev and Jochen Eisinger and Manfred Ernst
and Ross McIlroy and Hannes Payer",
title = "Idle time garbage collection scheduling",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "570--583",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908106",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficient garbage collection is increasingly important
in today's managed language runtime systems that demand
low latency, low memory consumption, and high
throughput. Garbage collection may pause the
application for many milliseconds to identify live
memory, free unused memory, and compact fragmented
regions of memory, even when employing concurrent
garbage collection. In animation-based applications
that require 60 frames per second, these pause times
may be observable, degrading user experience. This
paper introduces idle time garbage collection
scheduling to increase the responsiveness of
applications by hiding expensive garbage collection
operations inside of small, otherwise unused idle
portions of the application's execution, resulting in
smoother animations. Additionally we take advantage of
idleness to reduce memory consumption while allowing
higher memory use when high throughput is required. We
implemented idle time garbage collection scheduling in
V8, an open-source, production JavaScript virtual
machine running within Chrome. We present performance
results on various benchmarks running popular webpages
and show that idle time garbage collection scheduling
can significantly improve latency and memory
consumption. Furthermore, we introduce a new metric
called frame time discrepancy to quantify the quality
of the user experience and precisely measure the
improvements that idle time garbage collection provides
for a WebGL-based game benchmark. Idle time garbage
collection is shipped and enabled by default in
Chrome.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Jacek:2016:ALP,
author = "Nicholas Jacek and Meng-Chieh Chiu and Benjamin Marlin
and Eliot Moss",
title = "Assessing the limits of program-specific garbage
collection performance",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "584--598",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908120",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider the ultimate limits of program-specific
garbage collector performance for real programs. We
first characterize the GC schedule optimization problem
using Markov Decision Processes (MDPs). Based on this
characterization, we develop a method of determining,
for a given program run and heap size, an optimal
schedule of collections for a non-generational
collector. We further explore the limits of performance
of a generational collector, where it is not feasible
to search the space of schedules to prove optimality.
Still, we show significant improvements with Least
Squares Policy Iteration, a reinforcement learning
technique for solving MDPs. We demonstrate that there
is considerable promise to reduce garbage collection
costs by developing program-specific collection
policies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{vGleissenthall:2016:CUQ,
author = "Klaus v. Gleissenthall and Nikolaj Bj{\o}rner and
Andrey Rybalchenko",
title = "Cardinalities and universal quantifiers for verifying
parameterized systems",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "599--613",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908129",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parallel and distributed systems rely on intricate
protocols to manage shared resources and synchronize,
i.e., to manage how many processes are in a particular
state. Effective verification of such systems requires
universally quantification to reason about
parameterized state and cardinalities tracking sets of
processes, messages, failures to adequately capture
protocol logic. In this paper we present Tool, an
automatic invariant synthesis method that integrates
cardinality-based reasoning and universal
quantification. The resulting increase of
expressiveness allows Tool to verify, for the first
time, a representative collection of intricate
parameterized protocols.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Padon:2016:ISV,
author = "Oded Padon and Kenneth L. McMillan and Aurojit Panda
and Mooly Sagiv and Sharon Shoham",
title = "{Ivy}: safety verification by interactive
generalization",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "614--630",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908118",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite several decades of research, the problem of
formal verification of infinite-state systems has
resisted effective automation. We describe a system ---
Ivy --- for interactively verifying safety of
infinite-state systems. Ivy's key principle is that
whenever verification fails, Ivy graphically displays a
concrete counterexample to induction. The user then
interactively guides generalization from this
counterexample. This process continues until an
inductive invariant is found. Ivy searches for
universally quantified invariants, and uses a
restricted modeling language. This ensures that all
verification conditions can be checked algorithmically.
All user interactions are performed using graphical
models, easing the user's task. We describe our initial
experience with verifying several distributed
protocols.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Yang:2016:PDI,
author = "Jean Yang and Travis Hance and Thomas H. Austin and
Armando Solar-Lezama and Cormac Flanagan and Stephen
Chong",
title = "Precise, dynamic information flow for database-backed
applications",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "631--647",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908098",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an approach for dynamic information flow
control across the application and database. Our
approach reduces the amount of policy code required,
yields formal guarantees across the application and
database, works with existing relational database
implementations, and scales for realistic applications.
In this paper, we present a programming model that
factors out information flow policies from application
code and database queries, a dynamic semantics for the
underlying $^J D B$ core language, and proofs of
termination-insensitive non-interference and policy
compliance for the semantics. We implement these ideas
in Jacqueline, a Python web framework, and demonstrate
feasibility through three application case studies: a
course manager, a health record system, and a
conference management system used to run an academic
workshop. We show that in comparison to traditional
applications with hand-coded policy checks, Jacqueline
applications have (1) a smaller trusted computing base,
(2) fewer lines of policy code, and (3) reasonable,
often negligible, additional overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Costanzo:2016:EEV,
author = "David Costanzo and Zhong Shao and Ronghui Gu",
title = "End-to-end verification of information-flow security
for {C} and assembly programs",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "648--664",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908100",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Protecting the confidentiality of information
manipulated by a computing system is one of the most
important challenges facing today's cybersecurity
community. A promising step toward conquering this
challenge is to formally verify that the end-to-end
behavior of the computing system really satisfies
various information-flow policies. Unfortunately,
because today's system software still consists of both
C and assembly programs, the end-to-end verification
necessarily requires that we not only prove the
security properties of individual components, but also
carefully preserve these properties through compilation
and cross-language linking. In this paper, we present a
novel methodology for formally verifying end-to-end
security of a software system that consists of both C
and assembly programs. We introduce a general
definition of observation function that unifies the
concepts of policy specification, state
indistinguishability, and whole-execution behaviors. We
show how to use different observation functions for
different levels of abstraction, and how to link
different security proofs across abstraction levels
using a special kind of simulation that is guaranteed
to preserve state indistinguishability. To demonstrate
the effectiveness of our new methodology, we have
successfully constructed an end-to-end security proof,
fully formalized in the Coq proof assistant, of a
nontrivial operating system kernel (running on an
extended CompCert x86 assembly machine model). Some
parts of the kernel are written in C and some are
written in assembly; we verify all of the code,
regardless of language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Sinha:2016:DVM,
author = "Rohit Sinha and Manuel Costa and Akash Lal and Nuno P.
Lopes and Sriram Rajamani and Sanjit A. Seshia and
Kapil Vaswani",
title = "A design and verification methodology for secure
isolated regions",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "665--681",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908113",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hardware support for isolated execution (such as Intel
SGX) enables development of applications that keep
their code and data confidential even while running in
a hostile or compromised host. However, automatically
verifying that such applications satisfy
confidentiality remains challenging. We present a
methodology for designing such applications in a way
that enables certifying their confidentiality. Our
methodology consists of forcing the application to
communicate with the external world through a narrow
interface, compiling it with runtime checks that aid
verification, and linking it with a small runtime that
implements the narrow interface. The runtime includes
services such as secure communication channels and
memory management. We formalize this restriction on the
application as Information Release Confinement (IRC),
and we show that it allows us to decompose the task of
proving confidentiality into (a) one-time,
human-assisted functional verification of the runtime
to ensure that it does not leak secrets, (b) automatic
verification of the application's machine code to
ensure that it satisfies IRC and does not directly read
or corrupt the runtime's internal state. We present
/CONFIDENTIAL: a verifier for IRC that is modular,
automatic, and keeps our compiler out of the trusted
computing base. Our evaluation suggests that the
methodology scales to real-world applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Spiegelman:2016:TDS,
author = "Alexander Spiegelman and Guy Golan-Gueta and Idit
Keidar",
title = "Transactional data structure libraries",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "682--696",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908112",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce transactions into libraries of concurrent
data structures; such transactions can be used to
ensure atomicity of sequences of data structure
operations. By focusing on transactional access to a
well-defined set of data structure operations, we
strike a balance between the ease-of-programming of
transactions and the efficiency of custom-tailored data
structures. We exemplify this concept by designing and
implementing a library supporting transactions on any
number of maps, sets (implemented as skiplists), and
queues. Our library offers efficient and scalable
transactions, which are an order of magnitude faster
than state-of-the-art transactional memory toolkits.
Moreover, our approach treats stand-alone data
structure operations (like put and enqueue) as first
class citizens, and allows them to execute with
virtually no overhead, at the speed of the original
data structure library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Baghsorkhi:2016:FAV,
author = "Sara S. Baghsorkhi and Nalini Vasudevan and Youfeng
Wu",
title = "{FlexVec}: auto-vectorization for irregular loops",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "697--710",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908111",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traditional vectorization techniques build a
dependence graph with distance and direction
information to determine whether a loop is
vectorizable. Since vectorization reorders the
execution of instructions across iterations, in general
instructions involved in a strongly connected component
(SCC) are deemed not vectorizable unless the SCC can be
eliminated using techniques such as scalar expansion or
privatization. Therefore, traditional vectorization
techniques are limited in their ability to efficiently
handle loops with dynamic cross-iteration dependencies
or complex control flow interweaved within the
dependence cycles. When potential dependencies do not
occur very often, the end-result is under utilization
of the SIMD hardware. In this paper, we propose FlexVec
architecture that combines new vector instructions with
novel code generation techniques to dynamically adjusts
vector length for loop statements affected by
cross-iteration dependencies that happen at runtime. We
have designed and implemented FlexVec's new ISA as
extensions to the recently released AVX-512 ISA. We
have evaluated the performance improvements enabled by
FlexVec vectorization for 11 C/C++ SPEC 2006 benchmarks
and 7 real applications with AVX-512 vectorization as
baseline. We show that FlexVec vectorization technique
produces a Geomean speedup of 9\% for SPEC 2006 and a
Geomean speedup of 11\% for 7 real applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Kamil:2016:VLS,
author = "Shoaib Kamil and Alvin Cheung and Shachar Itzhaky and
Armando Solar-Lezama",
title = "Verified lifting of stencil computations",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "711--726",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908117",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper demonstrates a novel combination of program
synthesis and verification to lift stencil computations
from low-level Fortran code to a high-level summary
expressed using a predicate language. The technique is
sound and mostly automated, and leverages
counter-example guided inductive synthesis (CEGIS) to
find provably correct translations. Lifting existing
code to a high-performance description language has a
number of benefits, including maintainability and
performance portability. For example, our experiments
show that the lifted summaries can enable domain
specific compilers to do a better job of
parallelization as compared to an off-the-shelf
compiler working on the original code, and can even
support fully automatic migration to hardware
accelerators such as GPUs. We have implemented verified
lifting in a system called STNG and have evaluated it
using microbenchmarks, mini-apps, and real-world
applications. We demonstrate the benefits of verified
lifting by first automatically summarizing Fortran
source code into a high-level predicate language, and
subsequently translating the lifted summaries into
Halide, with the translated code achieving median
performance speedups of 4.1X and up to 24X for
non-trivial stencils as compared to the original
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '16 conference proceedings.",
}
@Article{Chen:2017:BDA,
author = "Yunji Chen",
title = "Big Data Analytics and Intelligence at {Alibaba
Cloud}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "1--1",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037699",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As China's largest cloud service provider, Alibaba
Cloud has been one of the fastest growing cloud
computing platforms in the world. In this talk, I'll
present an overview of Big Data and AI computing
platform at Alibaba Cloud, which consists of a wide
range of products and services to enable fast and
efficient big data development and intelligent
analysis. The underlying computing infrastructure
supports a variety of computation scenarios, including
batch, interactive, stream, and graph computation, as
well as large-scale machine learning on heterogeneous
cloud-scale data centers. Several big data products,
such as rule-based engine, recommendation system, BI
tools, etc., are provided to address different business
needs. The platform not only supports Alibaba's
internal businesses but also provides solid services to
enterprise customers. In addition, I'll describe key
techniques and system internals, and outline
outstanding research and engineering challenges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Cherupalli:2017:DAS,
author = "Hari Cherupalli and Henry Duwe and Weidong Ye and
Rakesh Kumar and John Sartori",
title = "Determining Application-specific Peak Power and Energy
Requirements for Ultra-low Power Processors",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "3--16",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037711",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many emerging applications such as IoT, wearables,
implantables, and sensor networks are power- and
energy-constrained. These applications rely on
ultra-low-power processors that have rapidly become the
most abundant type of processor manufactured today. In
the ultra-low-power embedded systems used by these
applications, peak power and energy requirements are
the primary factors that determine critical system
characteristics, such as size, weight, cost, and
lifetime. While the power and energy requirements of
these systems tend to be application-specific,
conventional techniques for rating peak power and
energy cannot accurately bound the power and energy
requirements of an application running on a processor,
leading to over-provisioning that increases system size
and weight. In this paper, we present an automated
technique that performs hardware-software co-analysis
of the application and ultra-low-power processor in an
embedded system to determine application-specific peak
power and energy requirements. Our technique provides
more accurate, tighter bounds than conventional
techniques for determining peak power and energy
requirements, reporting 15\% lower peak power and 17\%
lower peak energy, on average, than a conventional
approach based on profiling and guardbanding. Compared
to an aggressive stressmark-based approach, our
technique reports power and energy bounds that are 26\%
and 26\% lower, respectively, on average. Also, unlike
conventional approaches, our technique reports
guaranteed bounds on peak power and energy independent
of an application's input set. Tighter bounds on peak
power and energy can be exploited to reduce system
size, weight, and cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Chen:2017:PPQ,
author = "Quan Chen and Hailong Yang and Minyi Guo and Ram
Srivatsa Kannan and Jason Mars and Lingjia Tang",
title = "{Prophet}: Precise {QoS} Prediction on Non-Preemptive
Accelerators to Improve Utilization in Warehouse-Scale
Computers",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "17--32",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037700",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Guaranteeing Quality-of-Service (QoS) of
latency-sensitive applications while improving server
utilization through application co-location is
important yet challenging in modern datacenters. The
key challenge is that when applications are co-located
on a server, performance interference due to resource
contention can be detrimental to the application QoS.
Although prior work has proposed techniques to identify
``safe'' co-locations where application QoS is
satisfied by predicting the performance interference on
multicores, no such prediction technique on
accelerators such as GPUs. In this work, we present
Prophet, an approach to precisely predict the
performance degradation of latency-sensitive
applications on accelerators due to application
co-location. We analyzed the performance interference
on accelerators through a real system investigation and
found that unlike on multicores where the key
contentious resources are shared caches and main memory
bandwidth, the key contentious resources on
accelerators are instead processing elements,
accelerator memory bandwidth and PCIe bandwidth. Based
on this observation, we designed interference models
that enable the precise prediction for processing
element, accelerator memory bandwidth and PCIe
bandwidth contention on real hardware. By using a novel
technique to forecast solo-run execution traces of the
co-located applications using interference models,
Prophet can accurately predict the performance
degradation of latency-sensitive applications on
non-preemptive accelerators. Using Prophet, we can
identify ``safe'' co-locations on accelerators to
improve utilization without violating the QoS target.
Our evaluation shows that Prophet can predict the
performance degradation with an average prediction
error 5.47\% on real systems. Meanwhile, based on the
prediction, Prophet achieves accelerator utilization
improvements of 49.9\% on average while maintaining the
QoS target of latency-sensitive applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Kanev:2017:MAM,
author = "Svilen Kanev and Sam Likun Xi and Gu-Yeon Wei and
David Brooks",
title = "{Mallacc}: Accelerating Memory Allocation",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "33--45",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037736",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent work shows that dynamic memory allocation
consumes nearly 7\% of all cycles in Google
datacenters. With the trend towards increased
specialization of hardware, we propose Mallacc, an
in-core hardware accelerator designed for broad use
across a number of high-performance, modern memory
allocators. The design of Mallacc is quite different
from traditional throughput-oriented hardware
accelerators. Because memory allocation requests tend
to be very frequent, fast, and interspersed inside
other application code, accelerators must be optimized
for latency rather than throughput and area overheads
must be kept to a bare minimum. Mallacc accelerates the
three primary operations of a typical memory allocation
request: size class computation, retrieval of a free
memory block, and sampling of memory usage. Our results
show that malloc latency can be reduced by up to 50\%
with a hardware cost of less than 1500 um2 of silicon
area, less than 0.006\% of a typical high-performance
processor core.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Wen:2017:REV,
author = "Shasha Wen and Milind Chabbi and Xu Liu",
title = "{REDSPY}: Exploring Value Locality in Software",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "47--61",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037729",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Complex code bases with several layers of abstractions
have abundant inefficiencies that affect the execution
time. Value redundancy is a kind of inefficiency where
the same values are repeatedly computed, stored, or
retrieved over the course of execution. Not all
redundancies can be easily detected or eliminated with
compiler optimization passes due to the inherent
limitations of the static analysis. Microscopic
observation of whole executions at instruction- and
operand-level granularity breaks down abstractions and
helps recognize redundancies that masquerade in complex
programs. We have developed REDSPY---a fine-grained
profiler to pinpoint and quantify redundant operations
in program executions. Value redundancy may happen over
time at same locations or in adjacent locations, and
thus it has temporal and spatial locality. REDSPY
identifies both temporal and spatial value locality.
Furthermore, REDSPY is capable of identifying values
that are approximately the same, enabling optimization
opportunities in HPC codes that often use floating
point computations. REDSPY provides intuitive
optimization guidance by apportioning redundancies to
their provenance---source lines and execution calling
contexts. REDSPY pinpointed dramatically high volume of
redundancies in programs that were optimization targets
for decades, such as SPEC CPU2006 suite, Rodinia
benchmark, and NWChem---a production computational
chemistry code. Guided by REDSPY, we were able to
eliminate redundancies that resulted in significant
speedups.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Bhattacharjee:2017:TTP,
author = "Abhishek Bhattacharjee",
title = "Translation-Triggered Prefetching",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "63--76",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037705",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose translation-enabled memory prefetching
optimizations or TEMPO, a low-overhead hardware
mechanism to boost memory performance by exploiting the
operating system's (OS) virtual memory subsystem. We
are the first to make the following observations: (1) a
substantial fraction (20-40\%) of DRAM references in
modern big-data workloads are devoted to accessing page
tables; and (2) when memory references require page
table lookups in DRAM, the vast majority of them
(98\%+) also look up DRAM for the subsequent data
access. TEMPO exploits these observations to enable
DRAM row-buffer and on-chip cache prefetching of the
data that page tables point to. TEMPO requires trivial
changes to the memory controller (under 3\% additional
area), no OS or application changes, and improves
performance by 10-30\% and energy by 1-14\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Kim:2017:TAA,
author = "Channoh Kim and Jaehyeok Kim and Sungmin Kim and
Dooyoung Kim and Namho Kim and Gitae Na and Young H. Oh
and Hyeon Gyu Cho and Jae W. Lee",
title = "Typed Architectures: Architectural Support for
Lightweight Scripting",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "77--90",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037726",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic scripting languages are becoming more and more
widely adopted not only for fast prototyping but also
for developing production-grade applications. They
provide high-productivity programming environments
featuring high levels of abstraction with powerful
built-in functions, automatic memory management,
object-oriented programming paradigm and dynamic
typing. However, their flexible, dynamic type systems
easily become the source of inefficiency in terms of
instruction count, memory footprint, and energy
consumption. This overhead makes it challenging to
deploy these high-productivity programming technologies
on emerging single-board computers for IoT
applications. Addressing this challenge, this paper
introduces Typed Architectures, a high-efficiency,
low-cost execution substrate for dynamic scripting
languages, where each data variable retains high-level
type information at an ISA level. Typed Architectures
calculate and check the dynamic type of each variable
implicitly in hardware, rather than explicitly in
software, hence significantly reducing instruction
count for dynamic type checking. Besides, Typed
Architectures introduce polymorphic instructions (e.g.,
xadd), which are bound to the correct native
instruction at runtime within the pipeline (e.g., add
or fadd) to efficiently implement polymorphic
operators. Finally, Typed Architectures provide
hardware support for flexible yet efficient type tag
extraction and insertion, capturing common data layout
patterns of tag-value pairs. Our evaluation using a
fully synthesizable RISC-V RTL design on FPGA shows
that Typed Architectures achieve geomean speedups of
11.2\% and 9.9\% with maximum speedups of 32.6\% and
43.5\% for two production-grade scripting engines for
JavaScript and Lua, respectively. Moreover, Typed
Architectures improve the energy-delay product (EDP) by
19.3\% for JavaScript and 16.5\% for Lua with an area
overhead of 1.6\% at a 40nm technology node.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Seo:2017:FAS,
author = "Jihye Seo and Wook-Hee Kim and Woongki Baek and
Beomseok Nam and Sam H. Noh",
title = "Failure-Atomic Slotted Paging for Persistent Memory",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "91--104",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037737",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The slotted-page structure is a database page format
commonly used for managing variable-length records. In
this work, we develop a novel ``failure-atomic slotted
page structure'' for persistent memory that leverages
byte addressability and durability of persistent memory
to minimize redundant write operations used to maintain
consistency in traditional database systems.
Failure-atomic slotted paging consists of two key
elements: (i) in-place commit per page using hardware
transactional memory and (ii) slot header logging that
logs the commit mark of each page. The proposed scheme
is implemented in SQLite and compared against NVWAL,
the current state-of-the-art scheme. Our performance
study shows that our failure-atomic slotted paging
shows optimal performance for database transactions
that insert a single record. For transactions that
touch more than one database page, our proposed
slot-header logging scheme minimizes the logging
overhead by avoiding duplicating pages and logging only
the metadata of the dirty pages. Overall, we find that
our failure-atomic slotted-page management scheme
reduces database logging overhead to 1/6 and improves
query response time by up to 33\% compared to NVWAL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Nguyen:2017:WSP,
author = "Donald Nguyen and Keshav Pingali",
title = "What Scalable Programs Need from Transactional
Memory",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "105--118",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037750",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional memory (TM) has been the focus of
numerous studies, and it is supported in processors
such as the IBM Blue Gene/Q and Intel Haswell. Many
studies have used the STAMP benchmark suite to evaluate
their designs. However, the speedups obtained for the
STAMP benchmarks on all TM systems we know of are quite
limited; for example, with 64 threads on the IBM Blue
Gene/Q, we observe a median speedup of 1.4X using the
Blue Gene/Q hardware transactional memory (HTM), and a
median speedup of 4.1X using a software transactional
memory (STM). What limits the performance of these
benchmarks on TMs? In this paper, we argue that the
problem lies with the programming model and data
structures used to write them. To make this point, we
articulate two principles that we believe must be
embodied in any scalable program and argue that STAMP
programs violate both of them. By modifying the STAMP
programs to satisfy both principles, we produce a new
set of programs that we call the Stampede suite. Its
median speedup on the Blue Gene/Q is 8.0X when using an
STM. The two principles also permit us to simplify the
TM design. Using this new STM with the Stampede
benchmarks, we obtain a median speedup of 17.7X with 64
threads on the Blue Gene/Q and 13.2X with 32 threads on
an Intel Westmere system. These results suggest that
HTM and STM designs will benefit if more attention is
paid to the division of labor between application
programs, systems software, and hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Trippel:2017:TMM,
author = "Caroline Trippel and Yatin A. Manerkar and Daniel
Lustig and Michael Pellauer and Margaret Martonosi",
title = "{TriCheck}: Memory Model Verification at the
Trisection of Software, Hardware, and {ISA}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "119--133",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037719",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Memory consistency models (MCMs) which govern
inter-module interactions in a shared memory system,
are a significant, yet often under-appreciated, aspect
of system design. MCMs are defined at the various
layers of the hardware-software stack, requiring
thoroughly verified specifications, compilers, and
implementations at the interfaces between layers.
Current verification techniques evaluate segments of
the system stack in isolation, such as proving compiler
mappings from a high-level language (HLL) to an ISA or
proving validity of a microarchitectural implementation
of an ISA. This paper makes a case for full-stack MCM
verification and provides a toolflow, TriCheck, capable
of verifying that the HLL, compiler, ISA, and
implementation collectively uphold MCM requirements.
The work showcases TriCheck's ability to evaluate a
proposed ISA MCM in order to ensure that each layer and
each mapping is correct and complete. Specifically, we
apply TriCheck to the open source RISC-V ISA [55],
seeking to verify accurate, efficient, and legal
compilations from C11. We uncover under-specifications
and potential inefficiencies in the current RISC-V ISA
documentation and identify possible solutions for each.
As an example, we find that a RISC-V-compliant
microarchitecture allows 144 outcomes forbidden by C11
to be observed out of 1,701 litmus tests examined.
Overall, this paper demonstrates the necessity of
full-stack verification for detecting MCM-related bugs
in the hardware-software stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Nalli:2017:APM,
author = "Sanketh Nalli and Swapnil Haria and Mark D. Hill and
Michael M. Swift and Haris Volos and Kimberly Keeton",
title = "An Analysis of Persistent Memory Use with {WHISPER}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "135--148",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037730",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging non-volatile memory (NVM) technologies
promise durability with read and write latencies
comparable to volatile memory (DRAM). We define
Persistent Memory (PM) as NVM accessed with byte
addressability at low latency via normal memory
instructions. Persistent-memory applications ensure the
consistency of persistent data by inserting ordering
points between writes to PM allowing the construction
of higher-level transaction mechanisms. An epoch is a
set of writes to PM between ordering points. To put
systems research in PM on a firmer footing, we
developed and analyzed a PM benchmark suite called
WHISPER (Wisconsin-HP Labs Suite for Persistence) that
comprises ten PM applications we gathered to cover all
current interfaces to PM. A quantitative analysis
reveals several insights: (a) only 4\% of writes in
PM-aware applications are to PM and the rest are to
volatile memory, (b) software transactions are often
implemented with 5 to 50 ordering points (c) 75\% of
epochs update exactly one 64B cache line, (d) 80\% of
epochs from the same thread depend on previous epochs
from the same thread, while few epochs depend on epochs
from other threads. Based on our analysis, we propose
the Hands-off Persistence System (HOPS) to track
updates to PM in hardware. Current hardware design
requires applications to force data to PM as each epoch
ends. HOPS provides high-level ISA primitives for
applications to express durability and ordering
constraints separately and enforces them automatically,
while achieving 24.3\% better performance over current
approaches to persistence.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Zhang:2017:PPD,
author = "Tong Zhang and Changhee Jung and Dongyoon Lee",
title = "{ProRace}: Practical Data Race Detection for
Production Use",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "149--162",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037708",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents ProRace, a dynamic data race
detector practical for production runs. It is
lightweight, but still offers high race detection
capability. To track memory accesses, ProRace leverages
instruction sampling using the performance monitoring
unit (PMU) in commodity processors. Our PMU driver
enables ProRace to sample more memory accesses at a
lower cost compared to the state-of-the-art Linux
driver. Moreover, ProRace uses PMU-provided execution
contexts including register states and program path,
and reconstructs unsampled memory accesses offline.
This technique allows \ProRace to overcome inherent
limitations of sampling and improve the detection
coverage by performing data race detection on the trace
with not only sampled but also reconstructed memory
accesses. Experiments using racy production software
including apache and mysql shows that, with a
reasonable offline cost, ProRace incurs only 2.6\%
overhead at runtime with 27.5\% detection probability
with a sampling period of 10,000.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Olson:2017:CGM,
author = "Lena E. Olson and Mark D. Hill and David A. Wood",
title = "Crossing Guard: Mediating Host-Accelerator Coherence
Interactions",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "163--176",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037715",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Specialized hardware accelerators have performance and
energy-efficiency advantages over general-purpose
processors. To fully realize these benefits and aid
programmability, accelerators may share a physical and
virtual address space and full cache coherence with the
host system. However, allowing accelerators ---
particularly those designed by third parties --- to
directly communicate with host coherence protocols
poses several problems. Host coherence protocols are
complex, vary between companies, and may be
proprietary, increasing burden on accelerator
designers. Bugs in the accelerator implementation may
cause crashes and other serious consequences to the
host system. We propose Crossing Guard, a coherence
interface between the host coherence system and
accelerators. The Crossing Guard interface provides the
accelerator designer with a standardized set of
coherence messages that are simple enough to aid in
design of bug-free coherent caches. At the same time,
they are sufficiently complex to allow customized and
optimized accelerator caches with performance
comparable to using the host protocol. The Crossing
Guard hardware is implemented as part of the trusted
host, and provides complete safety to the host
coherence system, even in the presence of a
pathologically buggy accelerator cache.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{McMahan:2017:ASF,
author = "Joseph McMahan and Michael Christensen and Lawton
Nichols and Jared Roesch and Sung-Yee Guo and Ben
Hardekopf and Timothy Sherwood",
title = "An Architecture Supporting Formal and Compositional
Binary Analysis",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "177--191",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037733",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Building a trustworthy life-critical embedded system
requires deep reasoning about the potential effects
that sequences of machine instructions can have on full
system operation. Rather than trying to analyze
complete binaries and the countless ways their
instructions can interact with one another --- memory,
side effects, control registers, implicit state, etc.
--- we explore a new approach. We propose an
architecture controlled by a thin computational layer
designed to tightly correspond with the lambda
calculus, drawing on principles of functional
programming to bring the assembly much closer to myriad
reasoning frameworks, such as the Coq proof assistant.
This approach allows assembly-level verified versions
of critical code to operate safely in tandem with
arbitrary code, including imperative and unverified
system components, without the need for large
supporting trusted computing bases. We demonstrate that
this computational layer can be built in such a way as
to simultaneously provide full programmability and
compact, precise, and complete semantics, while still
using hardware resources comparable to normal embedded
systems. To demonstrate the practicality of this
approach, our FPGA-implemented prototype runs an
embedded medical application which monitors and treats
life-threatening arrhythmias. Though the system
integrates untrusted and imperative components, our
architecture allows for the formal verification of
multiple properties of the end-to-end system, including
a proof of correctness of the assembly-level
implementation of the core algorithm, the integrity of
trusted data via a non-interference proof, and a
guarantee that our prototype meets critical timing
requirements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Hsiao:2017:ASI,
author = "Chun-Hung Hsiao and Satish Narayanasamy and Essam
Muhammad Idris Khan and Cristiano L. Pereira and Gilles
A. Pokam",
title = "{AsyncClock}: Scalable Inference of Asynchronous Event
Causality",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "193--205",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037712",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Asynchronous programming model is commonly used in
mobile systems and Web 2.0 environments. Asynchronous
race detectors use algorithms that are an order of
magnitude performance and space inefficient compared to
conventional data race detectors. We solve this problem
by identifying and addressing two important problems in
reasoning about causality between asynchronous events.
Unlike conventional signal-wait operations,
establishing causal order between two asynchronous
events is fundamentally more challenging as there is no
common handle they operate on. We propose a new
primitive named AsyncClock that addresses this problem
by explicitly tracking causally preceding events, and
show that AsyncClock can handle a wide variety of
asynchronous causality models. We also address the
important scalability problem of efficiently
identifying heirless events whose metadata can be
reclaimed. We built the first single-pass,
non-graph-based Android race detector using our
algorithm and applied it to find errors in 20 popular
applications. Our tool incurs about 6x performance
overhead, which is several times more efficient than
the state-of-the-art solution. It also scales well with
the execution length. We used our tool to find 147
previously unknown harmful races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Calciu:2017:BBC,
author = "Irina Calciu and Siddhartha Sen and Mahesh
Balakrishnan and Marcos K. Aguilera",
title = "Black-box Concurrent Data Structures for {NUMA}
Architectures",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "207--221",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037721",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High-performance servers are Non-Uniform Memory Access
(NUMA) machines. To fully leverage these machines,
programmers need efficient concurrent data structures
that are aware of the NUMA performance artifacts. We
propose Node Replication (NR), a black-box approach to
obtaining such data structures. NR takes an arbitrary
sequential data structure and automatically transforms
it into a NUMA-aware concurrent data structure
satisfying linearizability. Using NR requires no
expertise in concurrent data structure design, and the
result is free of concurrency bugs. NR draws ideas from
two disciplines: shared-memory algorithms and
distributed systems. Briefly, NR implements a
NUMA-aware shared log, and then uses the log to
replicate data structures consistently across NUMA
nodes. NR is best suited for contended data structures,
where it can outperform lock-free algorithms by 3.1x,
and lock-based solutions by 30x. To show the benefits
of NR to a real application, we apply NR to the data
structures of Redis, an in-memory storage system. The
result outperforms other methods by up to 14x. The cost
of NR is additional memory for its log and replicas.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Vora:2017:CCR,
author = "Keval Vora and Chen Tian and Rajiv Gupta and Ziang
Hu",
title = "{CoRAL}: Confined Recovery in Distributed Asynchronous
Graph Processing",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "223--236",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037747",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing distributed asynchronous graph processing
systems employ checkpointing to capture globally
consistent snapshots and rollback all machines to most
recent checkpoint to recover from machine failures. In
this paper we argue that recovery in distributed
asynchronous graph processing does not require the
entire execution state to be rolled back to a globally
consistent state due to the relaxed asynchronous
execution semantics. We define the properties required
in the recovered state for it to be usable for correct
asynchronous processing and develop CoRAL, a
lightweight checkpointing and recovery algorithm.
First, this algorithm carries out confined recovery
that only rolls back graph execution states of the
failed machines to affect recovery. Second, it relies
upon lightweight checkpoints that capture locally
consistent snapshots with a reduced peak network
bandwidth requirement. Our experiments using real-world
graphs show that our technique recovers from failures
and finishes processing 1.5x to 3.2x faster compared to
the traditional asynchronous checkpointing and recovery
mechanism when failures impact 1 to 6 machines of a 16
machine cluster. Moreover, capturing locally consistent
snapshots significantly reduces intermittent high peak
bandwidth usage required to save the snapshots --- the
average reduction in 99th percentile bandwidth ranges
from 22\% to 51\% while 1 to 6 snapshot replicas are
being maintained.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Vora:2017:KFA,
author = "Keval Vora and Rajiv Gupta and Guoqing Xu",
title = "{KickStarter}: Fast and Accurate Computations on
Streaming Graphs via Trimmed Approximations",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "237--251",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037748",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Continuous processing of a streaming graph maintains
an approximate result of the iterative computation on a
recent version of the graph. Upon a user query, the
accurate result on the current graph can be quickly
computed by feeding the approximate results to the
iterative computation --- a form of incremental
computation that corrects the (small amount of) error
in the approximate result. Despite the effectiveness of
this approach in processing growing graphs, it is
generally not applicable when edge deletions are
present --- existing approximations can lead to either
incorrect results (e.g., monotonic computations
terminate at an incorrect minima/maxima) or poor
performance (e.g., with approximations, convergence
takes longer than performing the computation from
scratch). This paper presents KickStarter, a runtime
technique that can trim the approximate values for a
subset of vertices impacted by the deleted edges. The
trimmed approximation is both safe and profitable,
enabling the computation to produce correct results and
converge quickly. KickStarter works for a class of
monotonic graph algorithms and can be readily
incorporated in any existing streaming graph system.
Our experiments with four streaming algorithms on five
large graphs demonstrate that trimming not only
produces correct results but also accelerates these
algorithms by 8.5--23.7x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Powers:2017:BBG,
author = "Bobby Powers and John Vilk and Emery D. Berger",
title = "{Browsix}: Bridging the Gap Between {Unix} and the
Browser",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "253--266",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037727",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "Applications written to run on conventional operating
systems typically depend on OS abstractions like
processes, pipes, signals, sockets, and a shared file
system. Porting these applications to the web currently
requires extensive rewriting or hosting significant
portions of code server-side because browsers present a
nontraditional runtime environment that lacks OS
functionality. This paper presents Browsix, a framework
that bridges the considerable gap between conventional
operating systems and the browser, enabling unmodified
programs expecting a Unix-like environment to run
directly in the browser. Browsix comprises two core
parts: (1) a JavaScript-only system that makes core
Unix features (including pipes, concurrent processes,
signals, sockets, and a shared file system) available
to web applications; and (2) extended JavaScript
runtimes for C, C++, Go, and Node.js that support
running programs written in these languages as
processes in the browser. Browsix supports running a
POSIX shell, making it straightforward to connect
applications together via pipes. We illustrate
Browsix's capabilities via case studies that
demonstrate how it eases porting legacy applications to
the browser and enables new functionality. We
demonstrate a Browsix-enabled LaTeX editor that
operates by executing unmodified versions of pdfLaTeX
and BibTeX. This browser-only LaTeX editor can render
documents in seconds, making it fast enough to be
practical. We further demonstrate how Browsix lets us
port a client-server application to run entirely in the
browser for disconnected operation. Creating these
applications required less than 50 lines of glue code
and no code modifications, demonstrating how easily
Browsix can be used to build sophisticated web
applications from existing parts without
modification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Rajbhandari:2017:OCM,
author = "Samyam Rajbhandari and Yuxiong He and Olatunji Ruwase
and Michael Carbin and Trishul Chilimbi",
title = "Optimizing {CNNs} on Multicores for Scalability,
Performance and Goodput",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "267--280",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037745",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Convolutional Neural Networks (CNN) are a class of
Artificial Neural Networks (ANN) that are highly
efficient at the pattern recognition tasks that
underlie difficult AI problems in a variety of domains,
such as speech recognition, object recognition, and
natural language processing. CNNs are, however,
computationally intensive to train. This paper presents
the first characterization of the performance
optimization opportunities for training CNNs on CPUs.
Our characterization includes insights based on the
structure of the network itself (i.e., intrinsic
arithmetic intensity of the convolution and its
scalability under parallelism) as well as dynamic
properties of its execution (i.e., sparsity of the
computation). Given this characterization, we present
an automatic framework called spg-CNN for optimizing
CNN training on CPUs. It comprises of a computation
scheduler for efficient parallel execution, and two
code generators: one that optimizes for sparsity, and
the other that optimizes for spatial reuse in
convolutions. We evaluate spg-CNN using convolutions
from a variety of real world benchmarks, and show that
spg-CNN can train CNNs faster than state-of-the-art
approaches by an order of magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Sundararajah:2017:LTN,
author = "Kirshanthan Sundararajah and Laith Sakka and Milind
Kulkarni",
title = "Locality Transformations for Nested Recursive
Iteration Spaces",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "281--295",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037720",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There has been a significant amount of effort invested
in designing scheduling transformations such as loop
tiling and loop fusion that rearrange the execution of
dynamic instances of loop nests to place operations
that access the same data close together temporally. In
recent years, there has been interest in designing
similar transformations that operate on recursive
programs, but until now these transformations have only
considered simple scenarios: multiple recursions to be
fused, or a recursion nested inside a simple loop. This
paper develops the first set of scheduling
transformations for nested recursions: recursive
methods that call other recursive methods. These are
the recursive analog to nested loops. We present a
transformation called recursion twisting that
automatically improves locality at all levels of the
memory hierarchy, and show that this transformation can
yield substantial performance improvements across
several benchmarks that exhibit nested recursion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Li:2017:LAC,
author = "Ang Li and Shuaiwen Leon Song and Weifeng Liu and Xu
Liu and Akash Kumar and Henk Corporaal",
title = "Locality-Aware {CTA} Clustering for Modern {GPUs}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "297--311",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037709",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cache is designed to exploit locality; however, the
role of on-chip L1 data caches on modern GPUs is often
awkward. The locality among global memory requests from
different SMs (Streaming Multiprocessors) is
predominantly harvested by the commonly-shared L2 with
long access latency; while the in-core locality, which
is crucial for performance delivery, is handled
explicitly by user-controlled scratchpad memory. In
this work, we disclose another type of data locality
that has been long ignored but with performance
boosting potential --- the inter-CTA locality.
Exploiting such locality is rather challenging due to
unclear hardware feasibility, unknown and inaccessible
underlying CTA scheduler, and small in-core cache
capacity. To address these issues, we first conduct a
thorough empirical exploration on various modern GPUs
and demonstrate that inter-CTA locality can be
harvested, both spatially and temporally, on L1 or
L1/Tex unified cache. Through further quantification
process, we prove the significance and commonality of
such locality among GPU applications, and discuss
whether such reuse is exploitable. By leveraging these
insights, we propose the concept of CTA-Clustering and
its associated software-based techniques to reshape the
default CTA scheduling in order to group the CTAs with
potential reuse together on the same SM. Our techniques
require no hardware modification and can be directly
deployed on existing GPUs. In addition, we incorporate
these techniques into an integrated framework for
automatic inter-CTA locality optimization. We evaluate
our techniques using a wide range of popular GPU
applications on all modern generations of NVIDIA GPU
architectures. The results show that our proposed
techniques significantly improve cache performance
through reducing L2 cache transactions by 55\%, 65\%,
29\%, 28\% on average for Fermi, Kepler, Maxwell and
Pascal, respectively, leading to an average of 1.46x,
1.48x, 1.45x, 1.41x (up to 3.8x, 3.6x, 3.1x, 3.3x)
performance speedups for applications with
algorithm-related inter-CTA reuse.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Churchill:2017:SLS,
author = "Berkeley Churchill and Rahul Sharma and J. F. Bastien
and Alex Aiken",
title = "Sound Loop Superoptimization for {Google Native
Client}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "313--326",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037754",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software fault isolation (SFI) is an important
technique for the construction of secure operating
systems, web browsers, and other extensible software.
We demonstrate that superoptimization can dramatically
improve the performance of Google Native Client, a SFI
system that ships inside the Google Chrome Browser. Key
to our results are new techniques for superoptimization
of loops: we propose a new architecture for
superoptimization tools that incorporates both a fully
sound verification technique to ensure correctness and
a bounded verification technique to guide the search to
optimized code. In our evaluation we optimize 13 libc
string functions, formally verify the correctness of
the optimizations and report a median and average
speedup of 25\% over the libraries shipped by Google.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Bianchini:2017:IDE,
author = "Ricardo Bianchini",
title = "Improving Datacenter Efficiency",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "327--327",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3046426",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Internet companies can improve datacenter efficiency
and reduce costs, by minimizing resource waste while
avoiding (or limiting) performance degradation. In this
talk, I will first overview a few of the
efficiency-related efforts we are undertaking at
Microsoft, including leveraging workload history to
improve resource management. I will then discuss some
lessons from deploying these efforts in production and
how they relate to academic research.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Liu:2017:DBD,
author = "Mengxing Liu and Mingxing Zhang and Kang Chen and
Xuehai Qian and Yongwei Wu and Weimin Zheng and Jinglei
Ren",
title = "{DudeTM}: Building Durable Transactions with
Decoupling for Persistent Memory",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "329--343",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037714",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging non-volatile memory (NVM) offers
non-volatility, byte-addressability and fast access at
the same time. To make the best use of these
properties, it has been shown by empirical evidence
that programs should access NVM directly through CPU
load and store instructions, so that the overhead of a
traditional file system or database can be avoided.
Thus, durable transactions become a common choice of
applications for accessing persistent memory data in a
crash consistent manner. However, existing durable
transaction systems employ either undo logging, which
requires a fence for every memory write, or redo
logging, which requires intercepting all memory reads
within transactions. This paper presents DUDETM, a
crash-consistent durable transaction system that avoids
the drawbacks of both undo logging and redo logging.
DUDETM uses shadow DRAM to decouple the execution of a
durable transaction into three fully asynchronous
steps. The advantage is that only minimal fences and no
memory read instrumentation are required. This design
also enables an out-of-the-box transactional memory
(TM) to be used as an independent component in our
system. The evaluation results show that DUDETM adds
durability to a TM system with only 7.4 ~ 24.6\%
throughput degradation. Compared to the existing
durable transaction systems, DUDETM provides 1.7times
to 4.4times higher throughput. Moreover, DUDETM can be
implemented with existing hardware TMs with minor
hardware modifications, leading to a further 1.7times
speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Klimovic:2017:RRF,
author = "Ana Klimovic and Heiner Litz and Christos Kozyrakis",
title = "{ReFlex}: Remote Flash $ \approx $ Local Flash",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "345--359",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037732",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Remote access to NVMe Flash enables flexible scaling
and high utilization of Flash capacity and IOPS within
a datacenter. However, existing systems for remote
Flash access either introduce significant performance
overheads or fail to isolate the multiple remote
clients sharing each Flash device. We present ReFlex, a
software-based system for remote Flash access, that
provides nearly identical performance to accessing
local Flash. ReFlex uses a dataplane kernel to closely
integrate networking and storage processing to achieve
low latency and high throughput at low resource
requirements. Specifically, ReFlex can serve up to 850K
IOPS per core over TCP/IP networking, while adding 21us
over direct access to local Flash. ReFlex uses a QoS
scheduler that can enforce tail latency and throughput
service-level objectives (SLOs) for thousands of remote
clients. We show that ReFlex allows applications to use
remote Flash while maintaining their original
performance with local Flash.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Jevdjic:2017:ASC,
author = "Djordje Jevdjic and Karin Strauss and Luis Ceze and
Henrique S. Malvar",
title = "Approximate Storage of Compressed and Encrypted
Videos",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "361--373",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037718",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The popularization of video capture devices has
created strong storage demand for encoded videos.
Approximate storage can ease this demand by enabling
denser storage at the expense of occasional errors.
Unfortunately, even minor storage errors, such as bit
flips, can result in major visual damage in encoded
videos. Similarly, video encryption, widely employed
for privacy and digital rights management, may create
long dependencies between bits that show little or no
tolerance to storage errors. In this paper we propose
VideoApp, a novel and efficient methodology to compute
bit-level reliability requirements for encoded videos
by tracking visual and metadata dependencies within
encoded bitstreams. We further show how VideoApp can be
used to trade video quality for storage density in an
optimal way. We integrate our methodology into a
popular H.264 encoder to partition an encoded video
stream into multiple streams that can receive different
levels of error correction according to their
reliability needs. When applied to a dense and highly
error-prone multi-level cell storage substrate, our
variable error correction mechanism reduces the error
correction overhead by half under the most
error-intolerant encoder settings, achieving
quality/density points that neither compression nor
approximation can achieve alone. Finally, we define the
basic invariants needed to support encrypted
approximate video storage. We present an analysis of
block cipher modes of operation, showing that some are
fully compatible with approximation, enabling
approximate and secure video storage systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Elyasi:2017:EIR,
author = "Nima Elyasi and Mohammad Arjomand and Anand
Sivasubramaniam and Mahmut T. Kandemir and Chita R. Das
and Myoungsoo Jung",
title = "Exploiting Intra-Request Slack to Improve {SSD}
Performance",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "375--388",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037728",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With Solid State Disks (SSDs) offering high degrees of
parallelism, SSD controllers place data and direct
requests to exploit the maximum offered hardware
parallelism. In the quest to maximize parallelism and
utilization, sub-requests of a request that are
directed to different flash chips by the scheduler can
experience differential wait times since their
individual queues are not coordinated and load balanced
at all times. Since the macro request is considered
complete only when its last sub-request completes, some
of its sub-requests that complete earlier have to
necessarily wait for this last sub-request. This paper
opens the door to a new class of schedulers to leverage
such slack between sub-requests in order to improve
response times. Specifically, the paper presents the
design and implementation of a slack-enabled
re-ordering scheduler, called Slacker, for sub-requests
issued to each flash chip. Layered under a modern SSD
request scheduler, Slacker estimates the slack of each
incoming sub-request to a flash chip and allows them to
jump ahead of existing sub-requests with sufficient
slack so as to not detrimentally impact their response
times. Slacker is simple to implement and imposes only
marginal additions to the hardware. Using a spectrum of
21 workloads with diverse read-write characteristics,
we show that Slacker provides as much as 19.5\%, 13\%
and 14.5\% improvement in response times, with average
improvements of 12\%, 6.5\% and 8.5\%, for
write-intensive, read-intensive and read-write balanced
workloads, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Wang:2017:GSM,
author = "Kai Wang and Aftab Hussain and Zhiqiang Zuo and
Guoqing Xu and Ardalan Amiri Sani",
title = "{Graspan}: a Single-machine Disk-based Graph System
for Interprocedural Static Analyses of Large-scale
Systems Code",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "389--404",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037744",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "There is more than a decade-long history of using
static analysis to find bugs in systems such as Linux.
Most of the existing static analyses developed for
these systems are simple checkers that find bugs based
on pattern matching. Despite the presence of many
sophisticated interprocedural analyses, few of them
have been employed to improve checkers for systems code
due to their complex implementations and poor
scalability. In this paper, we revisit the scalability
problem of interprocedural static analysis from a ``Big
Data'' perspective. That is, we turn sophisticated code
analysis into Big Data analytics and leverage novel
data processing techniques to solve this traditional
programming language problem. We develop Graspan, a
disk-based parallel graph system that uses an edge-pair
centric computation model to compute dynamic transitive
closures on very large program graphs. We implement
context-sensitive pointer/alias and dataflow analyses
on Graspan. An evaluation of these analyses on large
codebases such as Linux shows that their Graspan
implementations scale to millions of lines of code and
are much simpler than their original implementations.
Moreover, we show that these analyses can be used to
augment the existing checkers; these augmented checkers
uncovered 132 new NULL pointer bugs and 1308
unnecessary NULL tests in Linux 4.4.0-rc5, PostgreSQL
8.3.9, and Apache httpd 2.2.18.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Ren:2017:SDH,
author = "Ao Ren and Zhe Li and Caiwen Ding and Qinru Qiu and
Yanzhi Wang and Ji Li and Xuehai Qian and Bo Yuan",
title = "{SC-DCNN}: Highly-Scalable Deep Convolutional Neural
Network using Stochastic Computing",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "405--418",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037746",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the recent advance of wearable devices and
Internet of Things (IoTs), it becomes attractive to
implement the Deep Convolutional Neural Networks
(DCNNs) in embedded and portable systems. Currently,
executing the software-based DCNNs requires
high-performance servers, restricting the widespread
deployment on embedded and mobile IoT devices. To
overcome this obstacle, considerable research efforts
have been made to develop highly-parallel and
specialized DCNN accelerators using GPGPUs, FPGAs or
ASICs. Stochastic Computing (SC), which uses a
bit-stream to represent a number within [-1, 1] by
counting the number of ones in the bit-stream, has high
potential for implementing DCNNs with high scalability
and ultra-low hardware footprint. Since multiplications
and additions can be calculated using AND gates and
multiplexers in SC, significant reductions in power
(energy) and hardware footprint can be achieved
compared to the conventional binary arithmetic
implementations. The tremendous savings in power
(energy) and hardware resources allow immense design
space for enhancing scalability and robustness for
hardware DCNNs. This paper presents SC-DCNN, the first
comprehensive design and optimization framework of
SC-based DCNNs, using a bottom-up approach. We first
present the designs of function blocks that perform the
basic operations in DCNN, including inner product,
pooling, and activation function. Then we propose four
designs of feature extraction blocks, which are in
charge of extracting features from input feature maps,
by connecting different basic function blocks with
joint optimization. Moreover, the efficient weight
storage methods are proposed to reduce the area and
power (energy) consumption. Putting all together, with
feature extraction blocks carefully selected, SC-DCNN
is holistically optimized to minimize area and power
(energy) consumption while maintaining high network
accuracy. Experimental results demonstrate that the
LeNet5 implemented in SC-DCNN consumes only 17 mm$^2$
area and 1.53 W power, achieves throughput of 781250
images/s, area efficiency of 45946 images/s/ mm$^2$,
and energy efficiency of 510734 images/J.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Ajay:2017:GIL,
author = "Jerry Ajay and Chen Song and Aditya Singh Rathore and
Chi Zhou and Wenyao Xu",
title = "{$3$DGates}: an Instruction-Level Energy Analysis and
Optimization of {$3$D} Printers",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "419--433",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037752",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As the next-generation manufacturing driven force, 3D
printing technology is having a transformative effect
on various industrial domains and has been widely
applied in a broad spectrum of applications. It also
progresses towards other versatile fields with portable
battery-powered 3D printers working on a limited energy
budget. While reducing manufacturing energy is an
essential challenge in industrial sustainability and
national economics, this growing trend motivates us to
explore the energy consumption of the 3D printer for
the purpose of energy efficiency. To this end, we
perform an in-depth analysis of energy consumption in
commercial, off-the-shelf 3D printers from an
instruction-level perspective. We build an
instruction-level energy model and an energy profiler
to analyze the energy cost during the fabrication
process. From the insights obtained by the energy
profiler, we propose and implement a cross-layer energy
optimization solution, called 3DGates, which spans the
instruction-set, the compiler and the firmware. We
evaluate 3DGates over 338 benchmarks on a 3D printer
and achieve an overall energy reduction of 25\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Cox:2017:EAT,
author = "Guilherme Cox and Abhishek Bhattacharjee",
title = "Efficient Address Translation for Architectures with
Multiple Page Sizes",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "435--448",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037704",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Processors and operating systems (OSes) support
multiple memory page sizes. Superpages increase
Translation Lookaside Buffer (TLB) hits, while small
pages provide fine-grained memory protection. Ideally,
TLBs should perform well for any distribution of page
sizes. In reality, set-associative TLBs --- used
frequently for their energy efficiency compared to
fully-associative TLBs --- cannot (easily) support
multiple page sizes concurrently. Instead, commercial
systems typically implement separate set-associative
TLBs for different page sizes. This means that when
superpages are allocated aggressively, TLB misses may,
counter intuitively, increase even if entries for small
pages remain unused (and vice-versa). We invent MIX
TLBs, energy-frugal set-associative structures that
concurrently support all page sizes by exploiting
superpage allocation patterns. MIX TLBs boost the
performance (often by 10-30\%) of big-memory
applications on native CPUs, virtualized CPUs, and
GPUs. MIX TLBs are simple and require no OS or program
changes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Lesokhin:2017:PFS,
author = "Ilya Lesokhin and Haggai Eran and Shachar Raindel and
Guy Shapiro and Sagi Grimberg and Liran Liss and Muli
Ben-Yehuda and Nadav Amit and Dan Tsafrir",
title = "Page Fault Support for Network Controllers",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "449--466",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037710",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Direct network I/O allows network controllers (NICs)
to expose multiple instances of themselves, to be used
by untrusted software without a trusted intermediary.
Direct I/O thus frees researchers from legacy software,
fueling studies that innovate in multitenant setups.
Such studies, however, overwhelmingly ignore one
serious problem: direct memory accesses (DMAs) of NICs
disallow page faults, forcing systems to either pin
entire address spaces to physical memory and thereby
hinder memory utilization, or resort to APIs that
pin/unpin memory buffers before/after they are DMAed,
which complicates the programming model and hampers
performance. We solve this problem by designing and
implementing page fault support for InfiniBand and
Ethernet NICs. A main challenge we tackle---unique to
NICs---is handling receive DMAs that trigger page
faults, leaving the NIC without memory to store the
incoming data. We demonstrate that our solution
provides all the benefits associated with ``regular''
virtual memory, notably (1) a simpler programming model
that rids users from the need to pin, and (2) the
ability to employ all the canonical memory
optimizations, such as memory overcommitment and
demand-paging based on actual use. We show that, as a
result, benchmark performance improves by up to 1.9x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Hu:2017:TFC,
author = "Yang Hu and Mingcong Song and Tao Li",
title = "Towards {``Full Containerization''} in Containerized
Network Function Virtualization",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "467--481",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037713",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With exploding traffic stuffing existing network
infra-structure, today's telecommunication and cloud
service providers resort to Network Function
Virtualization (NFV) for greater agility and economics.
Pioneer service provider such as AT{\&}T proposes to
adopt container in NFV to achieve shorter Virtualized
Network Function (VNF) provisioning time and better
runtime performance. However, we characterize typical
NFV work-loads on the containers and find that the
performance is unsatisfactory. We observe that the
shared host OS net-work stack is the main bottleneck,
where the traffic flow processing involves a large
amount of intermediate memory buffers and results in
significant last level cache pollution. Existing OS
memory allocation policies fail to exploit the locality
and data sharing information among buffers. In this
paper, we propose NetContainer, a software framework
that achieves fine-grained hardware resource management
for containerized NFV platform. NetContainer employs a
cache access overheads guided page coloring scheme to
coordinately address the inter-flow cache access
overheads and intra-flow cache access overheads. It
maps the memory buffer pages that manifest low cache
access overheads (across a flow or among the flows) to
the same last level cache partition. NetContainer
exploits a footprint theory based method to estimate
the cache access overheads and a Min-Cost Max-Flow
model to guide the memory buffer mappings. We implement
the NetContainer in Linux kernel and extensively
evaluate it with real NFV workloads. Exper-imental
results show that NetContainer outperforms conventional
page coloring-based memory allocator by 48\% in terms
of successful call rate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Wu:2017:FEF,
author = "Bo Wu and Xu Liu and Xiaobo Zhou and Changjun Jiang",
title = "{FLEP}: Enabling Flexible and Efficient Preemption on
{GPUs}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "483--496",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037742",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "GPUs are widely adopted in HPC and cloud computing
platforms to accelerate general-purpose workloads.
However, modern GPUs do not support flexible
preemption, leading to performance and priority
inversion problems in multi-tasking environments. In
this paper, we propose and develop FLEP, the first
software system that enables flexible kernel preemption
and kernel scheduling on commodity GPUs. The FLEP
compilation engine transforms the GPU program into
preemptable forms, which can be interrupted during
execution and yield all or part of the streaming
multi-processors (SMs) in the GPU. The FLEP runtime
engine intercepts all kernel invocations and determines
which kernels and how those kernels should be preempted
and scheduled. Experimental results on two-kernel
co-runs demonstrate up to 24.2X speedup for
high-priority kernels and up to 27X improvement on
normalized average turnaround time for kernels with the
same priority. FLEP reduces the preemption latency by
up to 41\% compared to yielding the whole GPU when the
waiting kernels only need several SMs. With all the
benefits, FLEP only introduces 2.5\% runtime overhead,
which is substantially lower than the kernel slicing
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Li:2017:SSA,
author = "Kaiwei Li and Jianfei Chen and Wenguang Chen and Jun
Zhu",
title = "{SaberLDA}: Sparsity-Aware Learning of Topic Models on
{GPUs}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "497--509",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037740",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Latent Dirichlet Allocation (LDA) is a popular tool
for analyzing discrete count data such as text and
images. Applications require LDA to handle both large
datasets and a large number of topics. Though
distributed CPU systems have been used, GPU-based
systems have emerged as a promising alternative because
of the high computational power and memory bandwidth of
GPUs. However, existing GPU-based LDA systems cannot
support a large number of topics because they use
algorithms on dense data structures whose time and
space complexity is linear to the number of topics. In
this paper, we propose SaberLDA, a GPU-based LDA system
that implements a sparsity-aware algorithm to achieve
sublinear time complexity and scales well to learn a
large number of topics. To address the challenges
introduced by sparsity, we propose a novel data layout,
a new warp-based sampling kernel, and an efficient
sparse count matrix updating algorithm that improves
locality, makes efficient utilization of GPU warps, and
reduces memory consumption. Experiments show that
SaberLDA can learn from billions-token-scale data with
up to 10,000 topics, which is almost two orders of
magnitude larger than that of the previous GPU-based
systems. With a single GPU card, SaberLDA is able to
learn 10,000 topics from a dataset of billions of
tokens in a few hours, which is only achievable with
clusters with tens of machines before.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Khazraee:2017:MNO,
author = "Moein Khazraee and Lu Zhang and Luis Vega and Michael
Bedford Taylor",
title = "{Moonwalk}: {NRE} Optimization in {ASIC} Clouds",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "511--526",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037749",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cloud services are becoming increasingly globalized
and data-center workloads are expanding exponentially.
GPU and FPGA-based clouds have illustrated improvements
in power and performance by accelerating
compute-intensive workloads. ASIC-based clouds are a
promising way to optimize the Total Cost of Ownership
(TCO) of a given datacenter computation (e.g. YouTube
transcoding) by reducing both energy consumption and
marginal computation cost. The feasibility of an ASIC
Cloud for a particular application is directly gated by
the ability to manage the Non-Recurring Engineering
(NRE) costs of designing and fabricating the ASIC, so
that it is significantly lower (e.g. 2X) than the TCO
of the best available alternative. In this paper, we
show that technology node selection is a major tool for
managing ASIC Cloud NRE, and allows the designer to
trade off an accelerator's excess energy efficiency and
cost performance for lower total cost. We explore NRE
and cross-technology optimization of ASIC Clouds for
four different applications: Bitcoin mining,
YouTube-style video transcoding, Litecoin, and Deep
Learning. We address these challenges and show large
reductions in the NRE, potentially enabling ASIC Clouds
to address a wider variety of datacenter workloads. Our
results suggest that advanced nodes like 16nm will lead
to sub-optimal TCO for many workloads, and that use of
older nodes like 65nm can enable a greater diversity of
ASIC Clouds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Park:2017:DRM,
author = "Jason Jong Kyu Park and Yongjun Park and Scott
Mahlke",
title = "Dynamic Resource Management for Efficient Utilization
of Multitasking {GPUs}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "527--540",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037707",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As graphics processing units (GPUs) are broadly
adopted, running multiple applications on a GPU at the
same time is beginning to attract wide attention.
Recent proposals on multitasking GPUs have focused on
either spatial multitasking, which partitions GPU
resource at a streaming multiprocessor (SM)
granularity, or simultaneous multikernel (SMK), which
runs multiple kernels on the same SM. However,
multitasking performance varies heavily depending on
the resource partitions within each scheme, and the
application mixes. In this paper, we propose GPU
Maestro that performs dynamic resource management for
efficient utilization of multitasking GPUs. GPU Maestro
can discover the best performing GPU resource partition
exploiting both spatial multitasking and SMK.
Furthermore, dynamism within a kernel and interference
between the kernels are automatically considered
because GPU Maestro finds the best performing partition
through direct measurements. Evaluations show that GPU
Maestro can improve average system throughput by 20.2\%
and 13.9\% over the baseline spatial multitasking and
SMK, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Zhang:2017:ISC,
author = "Rui Zhang and Natalie Stanley and Christopher Griggs
and Andrew Chi and Cynthia Sturton",
title = "Identifying Security Critical Properties for the
Dynamic Verification of a Processor",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "541--554",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037734",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a methodology for identifying security
critical properties for use in the dynamic verification
of a processor. Such verification has been shown to be
an effective way to prevent exploits of vulnerabilities
in the processor, given a meaningful set of security
properties. We use known processor errata to establish
an initial set of security-critical invariants of the
processor. We then use machine learning to infer an
additional set of invariants that are not tied to any
particular, known vulnerability, yet are critical to
security. We build a tool chain implementing the
approach and evaluate it for the open-source OR1200
RISC processor. We find that our tool can identify 19
(86.4\%) of the 22 manually crafted security-critical
properties from prior work and generates 3 new security
properties not covered in prior work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Ferraiuolo:2017:VPH,
author = "Andrew Ferraiuolo and Rui Xu and Danfeng Zhang and
Andrew C. Myers and G. Edward Suh",
title = "Verification of a Practical Hardware Security
Architecture Through Static Information Flow Analysis",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "555--568",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037739",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hardware-based mechanisms for software isolation are
becoming increasingly popular, but implementing these
mechanisms correctly has proved difficult, undermining
the root of security. This work introduces an effective
way to formally verify important properties of such
hardware security mechanisms. In our approach, hardware
is developed using a lightweight security-typed
hardware description language (HDL) that performs
static information flow analysis. We show the
practicality of our approach by implementing and
verifying a simplified but realistic multi-core
prototype of the ARM TrustZone architecture. To make
the security-typed HDL expressive enough to verify a
realistic processor, we develop new type system
features. Our experiments suggest that information flow
analysis is efficient, and programmer effort is modest.
We also show that information flow constraints are an
effective way to detect hardware vulnerabilities,
including several found in commercial processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Chisnall:2017:CJS,
author = "David Chisnall and Brooks Davis and Khilan Gudka and
David Brazdil and Alexandre Joannou and Jonathan
Woodruff and A. Theodore Markettos and J. Edward Maste
and Robert Norton and Stacey Son and Michael Roe and
Simon W. Moore and Peter G. Neumann and Ben Laurie and
Robert N. M. Watson",
title = "{CHERI JNI}: Sinking the {Java} Security Model into
the {C}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "569--583",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037725",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Java provides security and robustness by building a
high-level security model atop the foundation of memory
protection. Unfortunately, any native code linked into
a Java program --- including the million lines used to
implement the standard library --- is able to bypass
both the memory protection and the higher-level
policies. We present a hardware-assisted implementation
of the Java native code interface, which extends the
guarantees required for Java's security model to native
code. Our design supports safe direct access to buffers
owned by the JVM, including hardware-enforced read-only
access where appropriate. We also present Java language
syntax to declaratively describe isolated compartments
for native code. We show that it is possible to
preserve the memory safety and isolation requirements
of the Java security model in C code, allowing native
code to run in the same process as Java code with the
same impact on security as running equivalent Java
code. Our approach has a negligible impact on
performance, compared with the existing unsafe native
code interface. We demonstrate a prototype
implementation running on the CHERI microprocessor
synthesized in FPGA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Ge:2017:GGC,
author = "Xinyang Ge and Weidong Cui and Trent Jaeger",
title = "{GRIFFIN}: Guarding Control Flows Using {Intel}
Processor Trace",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "585--598",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037716",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Researchers are actively exploring techniques to
enforce control-flow integrity (CFI), which restricts
program execution to a predefined set of targets for
each indirect control transfer to prevent code-reuse
attacks. While hardware-assisted CFI enforcement may
have the potential for advantages in performance and
flexibility over software instrumentation, current
hardware-assisted defenses are either incomplete (i.e.,
do not enforce all control transfers) or less efficient
in comparison. We find that the recent introduction of
hardware features to log complete control-flow traces,
such as Intel Processor Trace (PT), provides an
opportunity to explore how efficient and flexible a
hardware-assisted CFI enforcement system may become.
While Intel PT was designed to aid in offline debugging
and failure diagnosis, we explore its effectiveness for
online CFI enforcement over unmodified binaries by
designing a parallelized method for enforcing various
types of CFI policies. We have implemented a prototype
called GRIFFIN in the Linux 4.2 kernel that enables
complete CFI enforcement over a variety of software,
including the Firefox browser and its jitted code. Our
experiments show that GRIFFIN can enforce fine-grained
CFI policies with shadow stack as recommended by
researchers at a performance that is comparable to
software-only instrumentation techniques. In addition,
we find that alternative logging approaches yield
significant performance improvements for trace
processing, identifying opportunities for further
hardware assistance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Delimitrou:2017:BKW,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{Bolt}: {I} Know What You Did Last Summer\ldots{} In
The Cloud",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "599--613",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037703",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cloud providers routinely schedule multiple
applications per physical host to increase efficiency.
The resulting interference on shared resources often
leads to performance degradation and, more importantly,
security vulnerabilities. Interference can leak
important information ranging from a service's
placement to confidential data, like private keys. We
present Bolt, a practical system that accurately
detects the type and characteristics of applications
sharing a cloud platform based on the interference an
adversary sees on shared resources. Bolt leverages
online data mining techniques that only require 2-5
seconds for detection. In a multi-user study on EC2,
Bolt correctly identifies the characteristics of 385
out of 436 diverse workloads. Extracting this
information enables a wide spectrum of
previously-impractical cloud attacks, including denial
of service attacks (DoS) that increase tail latency by
140x, as well as resource freeing (RFA) and
co-residency attacks. Finally, we show that while
advanced isolation mechanisms, such as cache
partitioning lower detection accuracy, they are
insufficient to eliminate these vulnerabilities
altogether. To do so, one must either disallow core
sharing, or only allow it between threads of the same
application, leading to significant inefficiencies and
performance penalties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Kang:2017:NCI,
author = "Yiping Kang and Johann Hauswald and Cao Gao and Austin
Rovinski and Trevor Mudge and Jason Mars and Lingjia
Tang",
title = "Neurosurgeon: Collaborative Intelligence Between the
Cloud and Mobile Edge",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "615--629",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037698",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The computation for today's intelligent personal
assistants such as Apple Siri, Google Now, and
Microsoft Cortana, is performed in the cloud. This
cloud-only approach requires significant amounts of
data to be sent to the cloud over the wireless network
and puts significant computational pressure on the
datacenter. However, as the computational resources in
mobile devices become more powerful and energy
efficient, questions arise as to whether this
cloud-only processing is desirable moving forward, and
what are the implications of pushing some or all of
this compute to the mobile devices on the edge. In this
paper, we examine the status quo approach of cloud-only
processing and investigate computation partitioning
strategies that effectively leverage both the cycles in
the cloud and on the mobile device to achieve low
latency, low energy consumption, and high datacenter
throughput for this class of intelligent applications.
Our study uses 8 intelligent applications spanning
computer vision, speech, and natural language domains,
all employing state-of-the-art Deep Neural Networks
(DNNs) as the core machine learning technique. We find
that given the characteristics of DNN algorithms, a
fine-grained, layer-level computation partitioning
strategy based on the data and computation variations
of each layer within a DNN has significant latency and
energy advantages over the status quo approach. Using
this insight, we design Neurosurgeon, a lightweight
scheduler to automatically partition DNN computation
between mobile devices and datacenters at the
granularity of neural network layers. Neurosurgeon does
not require per-application profiling. It adapts to
various DNN architectures, hardware platforms, wireless
networks, and server load levels, intelligently
partitioning computation for best latency or best
mobile energy. We evaluate Neurosurgeon on a
state-of-the-art mobile development platform and show
that it improves end-to-end latency by 3.1X on average
and up to 40.7X, reduces mobile energy consumption by
59.5\% on average and up to 94.7\%, and improves
datacenter throughput by 1.5X on average and up to
6.7X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Agarwal:2017:TAT,
author = "Neha Agarwal and Thomas F. Wenisch",
title = "{Thermostat}: Application-transparent Page Management
for Two-tiered Main Memory",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "631--644",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037706",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The advent of new memory technologies that are denser
and cheaper than commodity DRAM has renewed interest in
two-tiered main memory schemes. Infrequently accessed
application data can be stored in such memories to
achieve significant memory cost savings. Past research
on two-tiered main memory has assumed a 4KB page size.
However, 2MB huge pages are performance critical in
cloud applications with large memory footprints,
especially in virtualized cloud environments, where
nested paging drastically increases the cost of 4KB
page management. We present Thermostat, an
application-transparent huge-page-aware mechanism to
place pages in a dual-technology hybrid memory system
while achieving both the cost advantages of two-tiered
memory and performance advantages of transparent huge
pages. We present an online page classification
mechanism that accurately classifies both 4KB and 2MB
pages as hot or cold while incurring no observable
performance overhead across several representative
cloud applications. We implement Thermostat in Linux
kernel version 4.5 and evaluate its effectiveness on
representative cloud computing workloads running under
KVM virtualization. We emulate slow memory with
performance characteristics approximating near-future
high-density memory technology and show that Thermostat
migrates up to 50\% of application footprint to slow
memory while limiting performance degradation to 3\%,
thereby reducing memory cost up to 30\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Barbalace:2017:BBH,
author = "Antonio Barbalace and Robert Lyerly and Christopher
Jelesnianski and Anthony Carno and Ho-Ren Chuang and
Vincent Legout and Binoy Ravindran",
title = "Breaking the Boundaries in Heterogeneous-{ISA}
Datacenters",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "645--659",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037738",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy efficiency is one of the most important design
considerations in running modern datacenters.
Datacenter operating systems rely on software
techniques such as execution migration to achieve
energy efficiency across pools of machines. Execution
migration is possible in datacenters today because they
consist mainly of homogeneous-ISA machines. However,
recent market trends indicate that alternate ISAs such
as ARM and PowerPC are pushing into the datacenter,
meaning current execution migration techniques are no
longer applicable. How can execution migration be
applied in future heterogeneous-ISA datacenters? In
this work we present a compiler, runtime, and an
operating system extension for enabling execution
migration between heterogeneous-ISA servers. We present
a new multi-ISA binary architecture and
heterogeneous-OS containers for facilitating efficient
migration of natively-compiled applications. We build
and evaluate a prototype of our design and demonstrate
energy savings of up to 66\% for a workload running on
an ARM and an x86 server interconnected by a high-speed
network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Lustig:2017:ASC,
author = "Daniel Lustig and Andrew Wright and Alexandros
Papakonstantinou and Olivier Giroux",
title = "Automated Synthesis of Comprehensive Memory Model
Litmus Test Suites",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "661--675",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037723",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The memory consistency model is a fundamental part of
any shared memory architecture or programming model.
Modern weak memory models are notoriously difficult to
define and to implement correctly. Most real-world
programming languages, compilers, and
(micro)architectures therefore rely heavily on
black-box testing methodologies. The success of such
techniques requires that the suite of litmus tests used
to perform the testing be comprehensive--it should
ideally stress all obscure corner cases of the model
and of its implementation. Most litmus test suites
today are generated from some combination of manual
effort and randomization; however, the complex and
subtle nature of contemporary memory models means that
manual effort is both error-prone and subject to
incomplete coverage. This paper presents a methodology
for synthesizing comprehensive litmus test suites
directly from a memory model specification. By
construction, these suites contain all tests satisfying
a minimality criterion: that no synchronization
mechanism in the test can be weakened without causing
new behaviors to become observable. We formalize this
notion using the Alloy modeling language, and we apply
it to a number of existing and newly-proposed memory
models. Our results show not only that this synthesis
technique can automatically reproduce all
manually-generated tests from existing suites, but also
that it discovers new tests that are not as well
studied.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Liu:2017:DAD,
author = "Haopeng Liu and Guangpu Li and Jeffrey F. Lukman and
Jiaxin Li and Shan Lu and Haryadi S. Gunawi and Chen
Tian",
title = "{DCatch}: Automatically Detecting Distributed
Concurrency Bugs in Cloud Systems",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "677--691",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037735",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In big data and cloud computing era, reliability of
distributed systems is extremely important.
Unfortunately, distributed concurrency bugs, referred
to as DCbugs, widely exist. They hide in the large
state space of distributed cloud systems and manifest
non-deterministically depending on the timing of
distributed computation and communication. Effective
techniques to detect DCbugs are desired. This paper
presents a pilot solution, DCatch, in the world of
DCbug detection. DCatch predicts DCbugs by analyzing
correct execution of distributed systems. To build
DCatch, we design a set of happens-before rules that
model a wide variety of communication and concurrency
mechanisms in real-world distributed cloud systems. We
then build runtime tracing and trace analysis tools to
effectively identify concurrent conflicting memory
accesses in these systems. Finally, we design tools to
help prune false positives and trigger DCbugs. We have
evaluated DCatch on four representative open-source
distributed cloud systems, Cassandra, Hadoop MapReduce,
HBase, and ZooKeeper. By monitoring correct execution
of seven workloads on these systems, DCatch reports 32
DCbugs, with 20 of them being truly harmful.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Mashtizadeh:2017:TPD,
author = "Ali Jos{\'e} Mashtizadeh and Tal Garfinkel and David
Terei and David Mazieres and Mendel Rosenblum",
title = "Towards Practical Default-On Multi-Core Record\slash
Replay",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "693--708",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037751",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Castor, a record/replay system for
multi-core applications that provides consistently low
and predictable overheads. With Castor, developers can
leave record and replay on by default, making it
practical to record and reproduce production bugs, or
employ fault tolerance to recover from hardware
failures. Castor is inspired by several observations:
First, an efficient mechanism for logging
non-deterministic events is critical for recording
demanding workloads with low overhead. Through careful
use of hardware we were able to increase log throughput
by 10x or more, e.g., we could record a server handling
10x more requests per second for the same record
overhead. Second, most applications can be recorded
without modifying source code by using the compiler to
instrument language level sources of non-determinism,
in conjunction with more familiar techniques like
shared library interposition. Third, while Castor
cannot deterministically replay all data races, this
limitation is generally unimportant in practice,
contrary to what prior work has assumed. Castor
currently supports applications written in C, C++, and
Go on FreeBSD. We have evaluated Castor on parallel and
server workloads, including a commercial implementation
of memcached in Go, which runs Castor in production.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Huang:2017:PSA,
author = "Jian Huang and Michael Allen-Bond and Xuechen Zhang",
title = "{Pallas}: Semantic-Aware Checking for Finding Deep
Bugs in Fast Path",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "709--722",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037743",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software optimization is constantly a serious concern
for developing high-performance systems. To accelerate
the workflow execution of a specific functionality,
software developers usually define and implement a fast
path to speed up the critical and commonly executed
functions in the workflow. However, producing a
bug-free fast path is nontrivial. Our study on the
Linux kernel discloses that a committed fast path can
have up to 19 follow-up patches for bug fixing, and
most of them are deep semantic bugs, which are
difficult to be pinpointed by existing bug-finding
tools. In this paper, we present such a new category of
software bugs based on our fast-path bug study across
various system software including virtual memory
manager, file systems, network, and device drivers. We
investigate their root causes and identify five
error-prone aspects in a fast path: path state, trigger
condition, path output, fault handling, and assistant
data structure. We find that many of the deep bugs can
be prevented by applying static analysis incorporating
simple semantic information. We extract a set of rules
based on our findings and build a toolkit PALLAS to
check fast-path bugs. The evaluation results show that
PALLAS can effectively reveal fast-path bugs in a
variety of systems including Linux kernel, mobile
operating system, software-defined networking system,
and web browser.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Kotra:2017:HSC,
author = "Jagadish B. Kotra and Narges Shahidi and Zeshan A.
Chishti and Mahmut T. Kandemir",
title = "Hardware-Software Co-design to Mitigate {DRAM} Refresh
Overheads: a Case for Refresh-Aware Process
Scheduling",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "723--736",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037724",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "DRAM cells need periodic refresh to maintain data
integrity. With high capacity DRAMs, DRAM refresh poses
a significant performance bottleneck as the number of
rows to be refreshed (and hence the refresh cycle time,
tRFC) with each refresh command increases. Modern day
DRAMs perform refresh at a rank-level, while LPDDRs
used in mobile environments support refresh at a
per-bank level. Rank-level refresh degrades the
performance significantly since none of the banks in a
rank can serve the on-demand requests. Per-bank refresh
alleviates some of the performance bottlenecks as the
other banks in a rank are available for on-demand
requests. Typical DRAM retention time is in the order
several of milliseconds, viz, 64msec for environments
operating in temperatures below 85 deg C and 32msec for
environments operating above 85 deg C. With systems
moving towards increased consolidation (ex: virtualized
environments), DRAM refresh becomes a significant
bottleneck as it reduces the available overall DRAM
bandwidth per task. In this work, we propose a
hardware-software co-design to mitigate DRAM refresh
overheads by exposing the hardware address mapping and
DRAM refresh schedule to the Operating System. We
propose a novel DRAM refresh-aware process scheduling
algorithm in OS which schedules applications on cores
such that none of the on-demand requests from the
application are stalled by refreshes. Extensive
evaluation of our proposed co-design on
multi-programmed SPEC CPU2006 workloads show
significant performance improvement compared to the
previously proposed hardware only approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Kim:2017:KPC,
author = "Jinchun Kim and Elvira Teran and Paul V. Gratz and
Daniel A. Jim{\'e}nez and Seth H. Pugsley and Chris
Wilkerson",
title = "Kill the Program Counter: Reconstructing Program
Behavior in the Processor Cache Hierarchy",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "737--749",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037701",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data prefetching and cache replacement algorithms have
been intensively studied in the design of high
performance microprocessors. Typically, the data
prefetcher operates in the private caches and does not
interact with the replacement policy in the shared
Last-Level Cache (LLC). Similarly, most replacement
policies do not consider demand and prefetch requests
as different types of requests. In particular, program
counter (PC)-based replacement policies cannot learn
from prefetch requests since the data prefetcher does
not generate a PC value. PC-based policies can also be
negatively affected by compiler optimizations. In this
paper, we propose a holistic cache management technique
called Kill-the-PC (KPC) that overcomes the weaknesses
of traditional prefetching and replacement policy
algorithms. KPC cache management has three novel
contributions. First, a prefetcher which approximates
the future use distance of prefetch requests based on
its prediction confidence. Second, a simple replacement
policy provides similar or better performance than
current state-of-the-art PC-based prediction using
global hysteresis. Third, KPC integrates prefetching
and replacement policy into a whole system which is
greater than the sum of its parts. Information from the
prefetcher is used to improve the performance of the
replacement policy and vice-versa. Finally, KPC removes
the need to propagate the PC through entire on-chip
cache hierarchy while providing a holistic cache
management approach with better performance than
state-of-the-art PC-, and non-PC-based schemes. Our
evaluation shows that KPC provides 8\% better
performance than the best combination of existing
prefetcher and replacement policy for multi-core
workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Gao:2017:TSE,
author = "Mingyu Gao and Jing Pu and Xuan Yang and Mark Horowitz
and Christos Kozyrakis",
title = "{TETRIS}: Scalable and Efficient Neural Network
Acceleration with {$3$D} Memory",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "751--764",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037702",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The high accuracy of deep neural networks (NNs) has
led to the development of NN accelerators that improve
performance by two orders of magnitude. However,
scaling these accelerators for higher performance with
increasingly larger NNs exacerbates the cost and energy
overheads of their memory systems, including the
on-chip SRAM buffers and the off-chip DRAM channels.
This paper presents the hardware architecture and
software scheduling and partitioning techniques for
TETRIS, a scalable NN accelerator using 3D memory.
First, we show that the high throughput and low energy
characteristics of 3D memory allow us to rebalance the
NN accelerator design, using more area for processing
elements and less area for SRAM buffers. Second, we
move portions of the NN computations close to the DRAM
banks to decrease bandwidth pressure and increase
performance and energy efficiency. Third, we show that
despite the use of small SRAM buffers, the presence of
3D memory simplifies dataflow scheduling for NN
computations. We present an analytical scheduling
scheme that matches the efficiency of schedules derived
through exhaustive search. Finally, we develop a hybrid
partitioning scheme that parallelizes the NN
computations over multiple accelerators. Overall, we
show that TETRIS improves mthe performance by 4.1x and
reduces the energy by 1.5x over NN accelerators with
conventional, low-power DRAM memory systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Song:2017:HBA,
author = "Wonjun Song and Gwangsun Kim and Hyungjoon Jung and
Jongwook Chung and Jung Ho Ahn and Jae W. Lee and John
Kim",
title = "History-Based Arbitration for Fairness in
Processor-Interconnect of {NUMA} Servers",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "765--777",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037753",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "NUMA (non-uniform memory access) servers are commonly
used in high-performance computing and datacenters.
Within each server, a processor-interconnect (e.g.,
Intel QPI, AMD HyperTransport) is used to communicate
between the different sockets or nodes. In this work,
we explore the impact of the processor-interconnect on
overall performance --- in particular, the performance
unfairness caused by processor-interconnect
arbitration. It is well known that locally-fair
arbitration does not guarantee globally-fair bandwidth
sharing as closer nodes receive more bandwidth in a
multi-hop network. However, this work demonstrates that
the opposite can occur in a commodity NUMA server where
remote nodes receive higher bandwidth (and perform
better). We analyze this problem and identify that this
occurs because of external concentration used in router
micro-architectures for processor-interconnects without
globally-aware arbitration. While accessing remote
memory can occur in any NUMA system, performance
unfairness (or performance variation) is more critical
in cloud computing and virtual machines with shared
resources. We demonstrate how this unfairness creates
significant performance variation when a workload is
executed on the Xen virtualization platform. We then
provide analysis using synthetic workloads to better
understand the source of unfairness and eliminate the
impact of other shared resources, including the shared
last-level cache and main memory. To provide fairness,
we propose a novel, history-based arbitration that
tracks the history of arbitration grants made in the
previous history window. A weighted arbitration is done
based on the history to provide global fairness.
Through simulations, we show our proposed history-based
arbitration can provide global fairness and minimize
the processor-interconnect performance unfairness at
low cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Misra:2017:ELT,
author = "Pulkit A. Misra and Jeffrey S. Chase and Johannes
Gehrke and Alvin R. Lebeck",
title = "Enabling Lightweight Transactions with Precision
Time",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "779--794",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037722",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Distributed transactional storage is an important
service in today's data centers. Achieving high
performance without high complexity is often a
challenge for these systems due to sophisticated
consistency protocols and multiple layers of
abstraction. In this paper we show how to combine two
emerging technologies---Software-Defined Flash (SDF)
and precise synchronized clocks---to improve
performance and reduce complexity for transactional
storage within the data center. We present a
distributed transactional system (called MILANA) as a
layer above a durable multi-version key-value store
(called SEMEL) for read-heavy workloads within a data
center. SEMEL exploits write behavior of SSDs to
maintain a time-ordered sequence of versions for each
key efficiently and durably. MILANA adds a variant of
optimistic concurrency control above SEMEL's API to
service read requests from a consistent snapshot and to
enable clients to make fast local commit or abort
decisions for read-only transactions. Experiments with
the prototype reveal up to 43\% lower transaction abort
rates using IEEE Precision Time Protocol (PTP) vs. the
standard Network Time Protocol (NTP). Under the Retwis
benchmark, client-local validation of read-only
transactions yields a 35\% reduction in latency and
55\% increase in transaction throughput.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Liu:2017:ITN,
author = "Ming Liu and Liang Luo and Jacob Nelson and Luis Ceze
and Arvind Krishnamurthy and Kishore Atreya",
title = "{IncBricks}: Toward In-Network Computation with an
In-Network Cache",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "795--809",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037731",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The emergence of programmable network devices and the
increasing data traffic of datacenters motivate the
idea of in-network computation. By offloading compute
operations onto intermediate networking devices (e.g.,
switches, network accelerators, middleboxes), one can
(1) serve network requests on the fly with low latency;
(2) reduce datacenter traffic and mitigate network
congestion; and (3) save energy by running servers in a
low-power mode. However, since (1) existing switch
technology doesn't provide general computing
capabilities, and (2) commodity datacenter networks are
complex (e.g., hierarchical fat-tree topologies,
multipath communication), enabling in-network
computation inside a datacenter is challenging. In this
paper, as a step towards in-network computing, we
present IncBricks, an in-network caching fabric with
basic computing primitives. IncBricks is a
hardware-software co-designed system that supports
caching in the network using a programmable network
middlebox. As a key-value store accelerator, our
prototype lowers request latency by over 30\% and
doubles throughput for 1024 byte values in a common
cluster configuration. Our results demonstrate the
effectiveness of in-network computing and that
efficient datacenter network request processing is
possible if we carefully split the computation across
the different programmable computing elements in a
datacenter, including programmable switches, network
accelerators, and end hosts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Akturk:2017:AAA,
author = "Ismail Akturk and Ulya R. Karpuzcu",
title = "{AMNESIAC}: Amnesic Automatic Computer",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "811--824",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037741",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Due to imbalances in technology scaling, the energy
consumption of data storage and communication by far
exceeds the energy consumption of actual data
production, i.e., computation. As a consequence,
recomputing data can become more energy efficient than
storing and retrieving precomputed data. At the same
time, recomputation can relax the pressure on the
memory hierarchy and the communication bandwidth. This
study hence assesses the energy efficiency prospects of
trading computation for communication. We introduce an
illustrative proof-of-concept design, identify
practical limitations, and provide design guidelines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Bai:2017:VRE,
author = "Yuxin Bai and Victor W. Lee and Engin Ipek",
title = "Voltage Regulator Efficiency Aware Power Management",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "825--838",
month = apr,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093336.3037717",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:16 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Conventional off-chip voltage regulators are typically
bulky and slow, and are inefficient at exploiting
system and workload variability using Dynamic Voltage
and Frequency Scaling (DVFS). On-die integration of
voltage regulators has the potential to increase the
energy efficiency of computer systems by enabling power
control at a fine granularity in both space and time.
The energy conversion efficiency of on-chip regulators,
however, is typically much lower than off-chip
regulators, which results in significant energy losses.
Fine-grained power control and high voltage regulator
efficiency are difficult to achieve simultaneously,
with either emerging on-chip or conventional off-chip
regulators. A voltage conversion framework that relies
on a hierarchy of off-chip switching regulators and
on-chip linear regulators is proposed to enable
fine-grained power control with a regulator efficiency
greater than 90\%. A DVFS control policy that is based
on a reinforcement learning (RL) approach is developed
to exploit the proposed framework. Per-core RL agents
learn and improve their control policies independently,
while retaining the ability to coordinate their actions
to accomplish system level power management objectives.
When evaluated on a mix of 14 parallel and 13
multiprogrammed workloads, the proposed voltage
conversion framework achieves 18\% greater energy
efficiency than a conventional framework that uses
on-chip switching regulators. Moreover, when the RL
based DVFS control policy is used to control the
proposed voltage conversion framework, the system
achieves a 21\% higher energy efficiency over a
baseline oracle policy with coarse-grained power
control capability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '17 conference proceedings.",
}
@Article{Heinze:2017:TSA,
author = "Thomas S. Heinze and Anders M{\o}ller and Fabio
Strocco",
title = "Type safety analysis for {Dart}",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "1--12",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989226",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Optional typing is traditionally viewed as a
compromise between static and dynamic type checking,
where code without type annotations is not checked
until runtime. We demonstrate that optional type
annotations in Dart programs can be integrated into a
flow analysis to provide static type safety guarantees
both for annotated and non-annotated parts of the code.
We explore two approaches: one that uses type
annotations for filtering, and one that uses them as
specifications. What makes this particularly
challenging for Dart is that its type system is unsound
even for fully annotated code. Experimental results
show that the technique is remarkably effective, even
without context sensitivity: 99.3\% of all property
lookup operations are reported type safe in a
collection of benchmark programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Mezzetti:2017:TUP,
author = "Gianluca Mezzetti and Anders M{\o}ller and Fabio
Strocco",
title = "Type unsoundness in practice: an empirical study of
{Dart}",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "13--24",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989227",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The type system in the Dart programming language is
deliberately designed to be unsound: for a number of
reasons, it may happen that a program encounters type
errors at runtime although the static type checker
reports no warnings. According to the language
designers, this ensures a pragmatic balance between the
ability to catch bugs statically and allowing a
flexible programming style without burdening the
programmer with a lot of spurious type warnings. In
this work, we attempt to experimentally validate these
design choices. Through an empirical evaluation based
on open source programs written in Dart totaling 2.4 M
LOC, we explore how alternative, more sound choices
affect the type warnings being produced. Our results
show that some, but not all, sources of unsoundness can
be justified. In particular, we find that unsoundness
caused by bivariant function subtyping and method
overriding does not seem to help programmers. Such
information may be useful when designing future
versions of the language or entirely new languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Park:2017:PSS,
author = "Changhee Park and Hyeonseung Im and Sukyoung Ryu",
title = "Precise and scalable static analysis of {jQuery} using
a regular expression domain",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "25--36",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989228",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "jQuery is the most popular JavaScript library but the
state-of-the-art static analyzers for JavaScript
applications fail to analyze simple programs that use
jQuery. In this paper, we present a novel abstract
string domain whose elements are simple regular
expressions that can represent prefix, infix, and
postfix substrings of a string and even their sets. We
formalize the new domain in the abstract interpretation
framework with abstract models of strings and objects
commonly used in the existing JavaScript analyzers. For
practical use of the domain, we present polynomial-time
inclusion decision rules between the regular
expressions and prove that the rules exactly capture
the actual inclusion relation. We have implemented the
domain as an extension of the open-source JavaScript
analyzer, SAFE, and we show that the extension
significantly improves the scalability and precision of
the baseline analyzer in analyzing programs that use
jQuery.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{DeWael:2017:JTI,
author = "Mattias {De Wael} and Janwillem Swalens and Wolfgang
{De Meuter}",
title = "Just-in-time inheritance: a dynamic and implicit
multiple inheritance mechanism",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "37--47",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989229",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multiple inheritance is often criticised for the
ambiguity that arises when multiple parents want to
pass on a feature with the same name to their
offspring. A survey of programming languages reveals
that no programming language has an inherently implicit
and dynamic approach to resolve this ambiguity. This
paper identifies just-in-time inheritance as the first
implicit and dynamic inheritance mechanism. The key
idea of just-in-time inheritance is that one of the
parents is favoured over the others, which resolves the
ambiguity, and that the favoured parent can change at
runtime. However, just-in-time inheritance is not the
silver bullet to solve all ambiguity problems heir to
multiple inheritance, because it is not applicable in
all scenarios. We conclude that the applicability of
just-in-time inheritance is to be found in systems
where multiple inheritance is used to model an ``is-a
OR is-a''-relation, rather than the more traditional
``is-a AND is-a''-relation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Meier:2017:PVM,
author = "Remigius Meier and Armin Rigo and Thomas R. Gross",
title = "Parallel virtual machines with {RPython}",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "48--59",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989233",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The RPython framework takes an interpreter for a
dynamic language as its input and produces a Virtual
Machine{\^A} (VM) for that language. RPython is being
used to develop PyPy, a high-performance Python
interpreter. However, the produced VM does not support
parallel execution since the framework relies on a
Global Interpreter Lock{\^A} (GIL): PyPy serialises the
execution of multi-threaded Python programs. We
describe the rationale and design of a new parallel
execution model for RPython that allows the generation
of parallel virtual machines while leaving the language
semantics unchanged. This model then allows different
implementations of concurrency control, and we discuss
an implementation based on a GIL and an implementation
based on Software Transactional Memory{\^A} (STM). To
evaluate the benefits of either choice, we adapt PyPy
to work with both implementations (GIL and STM). The
evaluation shows that PyPy with STM improves the
runtime of a set of multi-threaded Python programs over
PyPy with a GIL by factors in the range of 1.87 $
\times $ up to 5.96 $ \times $ when executing on a
processor with 8 cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Chari:2017:BEH,
author = "Guido Chari and Diego Garbervetsky and Stefan Marr",
title = "Building efficient and highly run-time adaptable
virtual machines",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "60--71",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989234",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming language virtual machines (VMs) realize
language semantics, enforce security properties, and
execute applications efficiently. Fully Reflective
Execution Environments (EEs) are VMs that additionally
expose their whole structure and behavior to
applications. This enables developers to observe and
adapt VMs at run time. However, there is a belief that
reflective EEs are not viable for practical usages
because such flexibility would incur a high performance
overhead. To refute this belief, we built a reflective
EE on top of a highly optimizing dynamic compiler. We
introduced a new optimization model that, based on the
conjecture that variability of low-level (EE-level)
reflective behavior is low in many scenarios, mitigates
the most significant sources of the performance
overheads related to the reflective capabilities in the
EE. Our experiments indicate that reflective EEs can
reach peak performance in the order of standard VMs.
Concretely, that (a) if reflective mechanisms are not
used the execution overhead is negligible compared to
standard VMs, (b) VM operations can be redefined at
language-level without incurring in significant
overheads, (c) for several software adaptation tasks,
applying the reflection at the VM level is not only
lightweight in terms of engineering effort, but also
competitive in terms of performance in comparison to
other ad-hoc solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Foley-Bourgon:2017:EIC,
author = "Vincent Foley-Bourgon and Laurie Hendren",
title = "Efficiently implementing the copy semantics of
{MATLAB}'s arrays in {JavaScript}",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "72--83",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989235",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/matlab.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compiling MATLAB---a dynamic, array-based
language---to JavaScript is an attractive proposal: the
output code can be deployed on a platform used by
billions and can leverage the countless hours that have
gone into making JavaScript JIT engines fast. But
before that can happen, the original MATLAB code must
be properly translated, making sure to bridge the
semantic gaps of the two languages. An important area
where MATLAB and JavaScript differ is in their handling
of arrays: for example, in MATLAB, arrays are
one-indexed and writing at an index beyond the end of
an array extends it; in JavaScript, typed arrays are
zero-indexed and writing out of bounds is a no-op. A
MATLAB-to-JavaScript compiler must address these
mismatches. Another salient and pervasive difference
between the two languages is the assignment of arrays
to variables: in MATLAB, this operation has value
semantics, while in JavaScript is has reference
semantics. In this paper, we present MatJuice --- a
source-to-source, ahead-of-time compiler back-end for
MATLAB --- and how it deals efficiently with this last
issue. We present an intra-procedural data-flow
analysis to track where each array variable may point
to and which variables are possibly aliased. We also
present the associated copy insertion transformation
that uses the points-to information to insert explicit
copies when necessary. The resulting JavaScript program
respects the MATLAB value semantics and we show that it
performs fewer run-time copies than some alternative
approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Stadler:2017:ORL,
author = "Lukas Stadler and Adam Welc and Christian Humer and
Mick Jordan",
title = "Optimizing {R} language execution via aggressive
speculation",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "84--95",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989236",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The R language, from the point of view of language
design and implementation, is a unique combination of
various programming language concepts. It has
functional characteristics like lazy evaluation of
arguments, but also allows expressions to have
arbitrary side effects. Many runtime data structures,
for example variable scopes and functions, are
accessible and can be modified while a program
executes. Several different object models allow for
structured programming, but the object models can
interact in surprising ways with each other and with
the base operations of R. R works well in practice, but
it is complex, and it is a challenge for language
developers trying to improve on the current
state-of-the-art, which is the reference implementation
--- GNU R. The goal of this work is to demonstrate
that, given the right approach and the right set of
tools, it is possible to create an implementation of
the R language that provides significantly better
performance while keeping compatibility with the
original implementation. In this paper we describe
novel optimizations backed up by aggressive speculation
techniques and implemented within FastR, an alternative
R language implementation, utilizing Truffle --- a
JVM-based language development framework developed at
Oracle Labs. We also provide experimental evidence
demonstrating effectiveness of these optimizations in
comparison with GNU R, as well as Renjin and TERR
implementations of the R language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Hemann:2017:SEL,
author = "Jason Hemann and Daniel P. Friedman and William E.
Byrd and Matthew Might",
title = "A small embedding of logic programming with a simple
complete search",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "96--107",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989230",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a straightforward, call-by-value embedding
of a small logic programming language with a simple
complete search. We construct the entire language in 54
lines of Racket --- half of which implement
unification. We then layer over it, in 43 lines, a
reconstruction of an existing logic programming
language, miniKanren, and attest to our
implementation's pedagogical value. Evidence suggests
our combination of expressiveness, concision, and
elegance is compelling: since microKanren's release, it
has spawned over 50 embeddings in over two dozen host
languages, including Go, Haskell, Prolog and
Smalltalk.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Warth:2017:MSA,
author = "Alessandro Warth and Patrick Dubroy and Tony
Garnock-Jones",
title = "Modular semantic actions",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "108--119",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989231",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parser generators give programmers a convenient and
declarative way to write parsers and other
language-processing applications, but their mechanisms
for extension and code reuse often leave something to
be desired. We introduce Ohm, a parser generator in
which both grammars and their interpretations can be
extended in safe and modular ways. Unlike many similar
tools, Ohm completely separates grammars and semantic
actions, avoiding the problems that arise when these
two concerns are mixed. This paper describes the
particular way in which Ohm achieves this separation,
and discusses the resulting benefits to modularity and
extensibility.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Marr:2017:CLC,
author = "Stefan Marr and Benoit Daloze and Hanspeter
M{\"o}ssenb{\"o}ck",
title = "Cross-language compiler benchmarking: are we fast
yet?",
journal = j-SIGPLAN,
volume = "52",
number = "2",
pages = "120--131",
month = feb,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093334.2989232",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Comparing the performance of programming languages is
difficult because they differ in many aspects including
preferred programming abstractions, available
frameworks, and their runtime systems. Nonetheless, the
question about relative performance comes up repeatedly
in the research community, industry, and wider audience
of enthusiasts. This paper presents 14 benchmarks and a
novel methodology to assess the compiler effectiveness
across language implementations. Using a set of common
language abstractions, the benchmarks are implemented
in Java, JavaScript, Ruby, Crystal, Newspeak, and
Smalltalk. We show that the benchmarks exhibit a wide
range of characteristics using language-agnostic
metrics. Using four different languages on top of the
same compiler, we show that the benchmarks perform
similarly and therefore allow for a comparison of
compiler effectiveness across languages. Based on
anecdotes, we argue that these benchmarks help language
implementers to identify performance bugs and
optimization potential by comparing to other language
implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '16 conference proceedings.",
}
@Article{Rompf:2017:LMS,
author = "Tiark Rompf",
title = "Lightweight modular staging {(LMS)}: generate all the
things! (keynote)",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "1--1",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993237",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent years have seen a surge of interest in staging
and generative programming, driven by the increasing
difficulty of making high-level code run fast on modern
hardware. While the mechanics of program generation are
relatively well understood, we have only begun to
understand how to develop systems in a generative way.
The Lightweight Modular Staging (LMS) platform forms
the core of a research agenda to make generative
programming more widely accessible, through powerful
libraries and a growing selection of case studies that
illuminate design patterns and crystallize best
practices for high-level and effective generative
programming. This talk will reflect on the foundations
of LMS, on applications, achievements, challenges, as
well as ongoing and future work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Biboudis:2017:RJD,
author = "Aggelos Biboudis and Pablo Inostroza and Tijs van der
Storm",
title = "{Recaf}: {Java} dialects as libraries",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "2--13",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993239",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mainstream programming languages like Java have
limited support for language extensibility. Without
mechanisms for syntactic abstraction, new programming
styles can only be embedded in the form of libraries,
limiting expressiveness. In this paper, we present
Recaf, a lightweight tool for creating Java dialects;
effectively extending Java with new language constructs
and user defined semantics. The Recaf compiler
generically transforms designated method bodies to code
that is parameterized by a semantic factory (Object
Algebra), defined in plain Java. The implementation of
such a factory defines the desired runtime semantics.
We applied our design to produce several examples from
a diverse set of programming styles and two case
studies: we define (i) extensions for generators,
asynchronous computations and asynchronous streams and
(ii) a Domain-Specific Language (DSL) for Parsing
Expression Grammars (PEGs), in a few lines of code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Wang:2017:CJ,
author = "Yanlin Wang and Haoyuan Zhang and Bruno C. d. S.
Oliveira and Marco Servetto",
title = "Classless {Java}",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "14--24",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993238",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents an OO style without classes, which
we call interface-based object-oriented programming
(IB). IB is a natural extension of closely related
ideas such as traits. Abstract state operations provide
a new way to deal with state, which allows for
flexibility not available in class-based languages. In
IB state can be type-refined in subtypes. The
combination of a purely IB style and type-refinement
enables powerful idioms using multiple inheritance and
state. To introduce IB to programmers we created
Classless Java: an embedding of IB directly into Java.
Classless Java uses annotation processing for code
generation and relies on new features of Java 8 for
interfaces. The code generation techniques used in
Classless Java have interesting properties, including
guarantees that the generated code is type-safe and
good integration with IDEs. Usefulness of IB and
Classless Java is shown with examples and case
studies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Zacharopoulos:2017:EMM,
author = "Theologos Zacharopoulos and Pablo Inostroza and Tijs
van der Storm",
title = "Extensible modeling with managed data in {Java}",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "25--35",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993240",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many model-driven development (MDD) tools employ
specialized frameworks and modeling languages, and
assume that the semantics of models is provided by some
form of code generation. As a result, programming
against models is cumbersome and does not integrate
well with ordinary programming languages and IDEs. In
this paper we present MD4J, a modeling approach for
embedding metamodels directly in Java, using plain
interfaces and annotations. The semantics is provided
by data managers that create and manipulate models.
This architecture enables two kinds of extensibility.
First, the data managers can be changed or extended to
obtain different base semantics of a model. This allows
a kind of aspect-oriented programming. Second, the
metamodels themselves can be extended with additional
fields and methods to modularly enrich a modeling
language. We illustrate our approach using the example
of state machines, discuss the implementation, and
evaluate it with two case-studies: the execution of UML
activity diagrams and an aspect-oriented refactoring of
JHotDraw.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Rosa:2017:APV,
author = "Andrea Ros{\`a} and Lydia Y. Chen and Walter Binder",
title = "Actor profiling in virtual execution environments",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "36--46",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993241",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nowadays, many virtual execution environments benefit
from concurrency offered by the actor model.
Unfortunately, while actors are used in many
applications, existing profiling tools are not much
effective in analyzing the performance of applications
using actors. In this paper, we present a new
instrumentation-based technique to profile actors in
virtual execution environments. Our technique adopts
platform-independent profiling metrics that minimize
the perturbations induced by the instrumentation logic
and allow comparing profiling results across different
platforms. In particular, our technique measures the
initialization cost, the amount of executed
computations, and the messages sent and received by
each actor. We implement our technique within a
profiling tool for Akka actors on the Java platform.
Evaluation results show that our profiling technique
helps performance analysis of actor utilization and
communication between actors in large-scale computing
frameworks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Konat:2017:BDS,
author = "Gabri{\"e}l Konat and Sebastian Erdweg and Eelco
Visser",
title = "Bootstrapping domain-specific meta-languages in
language workbenches",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "47--58",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993242",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is common practice to bootstrap compilers of
programming languages. By using the compiled language
to implement the compiler, compiler developers can code
in their own high-level language and gain a large-scale
test case. In this paper, we investigate bootstrapping
of compiler-compilers as they occur in language
workbenches. Language workbenches support the
development of compilers through the application of
multiple collaborating domain-specific meta-languages
for defining a language's syntax, analysis, code
generation, and editor support. We analyze the
bootstrapping problem of language workbenches in
detail, propose a method for sound bootstrapping based
on fixpoint compilation, and show how to conduct
breaking meta-language changes in a bootstrapped
language workbench. We have applied sound bootstrapping
to the Spoofax language workbench and report on our
experience.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Philips:2017:DDD,
author = "Laure Philips and Joeri {De Koster} and Wolfgang {De
Meuter} and Coen {De Roover}",
title = "Dependence-driven delimited {CPS} transformation for
{JavaScript}",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "59--69",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993243",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In today's web applications asynchronous requests to
remote services using callbacks or futures are
omnipresent. The continuation of such a non-blocking
task is represented as a callback function that will
later be called with the result of the request. This
style of programming where the remainder of a
computation is captured in a continuation function is
called continuation-passing style (CPS). This style of
programming can quickly lead to a phenomenon called
``callback hell'', which has a negative impact on the
maintainability of applications that employ this style.
Several alternatives to callbacks are therefore gaining
traction within the web domain. For example, there are
a number of frameworks that rely on automatically
transforming sequential style code into the
continuation-passing style. However, these frameworks
often employ a conservative approach in which each
function call is transformed into CPS. This
conservative approach can sequentialise requests that
could otherwise be run in parallel. So-called delimited
continuations can remedy, but require special marks
that have to be manually inserted in the code for
marking the beginning and end of the continuation. In
this paper we propose an alternative strategy in which
we apply a delimited CPS transformation that operates
on a Program Dependence Graph instead to find the
limits of each continuation.We implement this strategy
in JavaScript and demonstrate its applicability to
various web programming scenarios.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Lee:2017:SRE,
author = "Mina Lee and Sunbeom So and Hakjoo Oh",
title = "Synthesizing regular expressions from examples for
introductory automata assignments",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "70--80",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993244",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a method for synthesizing regular
expressions for introductory automata assignments.
Given a set of positive and negative examples, the
method automatically synthesizes the simplest possible
regular expression that accepts all the positive
examples while rejecting all the negative examples. The
key novelty is the search-based synthesis algorithm
that leverages ideas from over- and
under-approximations to effectively prune out a large
search space. We have implemented our technique in a
tool and evaluated it with non-trivial benchmark
problems that students often struggle with. The results
show that our system can synthesize desired regular
expressions in 6.7 seconds on the average, so that it
can be interactively used by students to enhance their
understanding of regular expressions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Omar:2017:PSF,
author = "Cyrus Omar and Jonathan Aldrich",
title = "Programmable semantic fragments: the design and
implementation of {\tt typy}",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "81--92",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993245",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces typy, a statically typed
programming language embedded by reflection into
Python. typy features a fragmentary semantics, i.e. it
delegates semantic control over each term, drawn from
Python's fixed concrete and abstract syntax, to some
contextually relevant user-defined semantic fragment.
The delegated fragment programmatically (1) typechecks
the term (following a bidirectional protocol); and (2)
assigns dynamic meaning to the term by computing a
translation to Python. We argue that this design is
expressive with examples of fragments that express the
static and dynamic semantics of (1) functional records;
(2) labeled sums (with nested pattern matching a la
ML); (3) a variation on JavaScript's prototypal object
system; and (4) typed foreign interfaces to Python and
OpenCL. These semantic structures are, or would need to
be, defined primitively in conventionally structured
languages. We further argue that this design is
compositionally well-behaved. It avoids the expression
problem and the problems of grammar composition because
the syntax is fixed. Moreover, programs are
semantically stable under fragment composition (i.e.
defining a new fragment will not change the meaning of
existing program components.)",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Kienzle:2017:DDV,
author = "J{\"o}rg Kienzle and Gunter Mussbacher and Philippe
Collet and Omar Alam",
title = "Delaying decisions in variable concern hierarchies",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "93--103",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993246",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concern-Oriented Reuse (CORE) proposes a new way of
structuring model-driven software development, where
models of the system are modularized by domains of
abstraction within units of reuse called concerns.
Within a CORE concern, models are further decomposed
and modularized by features. This paper extends CORE
with a technique that enables developers of high-level
concerns to reuse lower-level concerns without
unnecessarily committing to a specific feature
selection. The developer can select the functionality
that is minimally needed to continue development, and
reexpose relevant alternative lower-level features of
the reused concern in the reusing concern's interface.
This effectively delays decision making about
alternative functionality until the higher-level reuse
context, where more detailed requirements are known and
further decisions can be made. The paper describes the
algorithms for composing the variation (i.e., feature
and impact models), customization, and usage interfaces
of a concern, as well as the concern's realization
models and finally an entire concern hierarchy, as is
necessary to support delayed decision making in CORE.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Adam:2017:ACG,
author = "Sorin Adam and Marco Kuhrmann and Ulrik Pagh Schultz",
title = "Automatic code generation in practice: experiences
with embedded robot controllers",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "104--108",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993247",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mobile robots often use a distributed architecture in
which software components are deployed to heterogeneous
hardware modules. Ensuring the consistency with the
designed architecture is a complex task, notably if
functional safety requirements have to be fulfilled. We
propose to use a domain-specific language to specify
those requirements and to allow for generating a
safety-enforcing layer of code, which is deployed to
the robot. The paper at hand reports experiences in
practically applying code generation to mobile robots.
For two cases, we discuss how we addressed challenges,
e.g., regarding weaving code generation into
proprietary development environments and testing of
manually written code. We find that a DSL based on the
same conceptual model can be used across different
kinds of hardware modules, but a significant adaptation
effort is required in practical scenarios involving
different kinds of hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Braz:2017:CCA,
author = "Larissa Braz and Rohit Gheyi and Melina Mongiovi and
M{\'a}rcio Ribeiro and Fl{\'a}vio Medeiros and Leopoldo
Teixeira",
title = "A change-centric approach to compile configurable
systems with {\tt \#ifdef}s",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "109--119",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993250",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Configurable systems typically use \#ifdefs to denote
variability. Generating and compiling all
configurations may be time-consuming. An alternative
consists of using variability-aware parsers, such as
TypeChef. However, they may not scale. In practice,
compiling the complete systems may be costly.
Therefore, developers can use sampling strategies to
compile only a subset of the configurations. We propose
a change-centric approach to compile configurable
systems with \#ifdefs by analyzing only configurations
impacted by a code change (transformation). We
implement it in a tool called CHECKCONFIGMX, which
reports the new compilation errors introduced by the
transformation. We perform an empirical study to
evaluate 3,913 transformations applied to the 14
largest files of BusyBox, Apache HTTPD, and Expat
configurable systems. CHECKCONFIGMX finds 595
compilation errors of 20 types introduced by 41
developers in 214 commits (5.46\% of the analyzed
transformations). In our study, it reduces by at least
50\% (an average of 99\%) the effort of evaluating the
analyzed transformations by comparing with the
exhaustive approach without considering a feature
model. CHECKCONFIGMX may help developers to reduce
compilation effort to evaluate fine-grained
transformations applied to configurable systems with
\#ifdefs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Pereira:2017:FBP,
author = "Juliana Alves Pereira and Pawel Matuszyk and Sebastian
Krieter and Myra Spiliopoulou and Gunter Saake",
title = "A feature-based personalized recommender system for
product-line configuration",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "120--131",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993249",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's competitive marketplace requires the industry
to understand unique and particular needs of their
customers. Product line practices enable companies to
create individual products for every customer by
providing an interdependent set of features. Users
configure personalized products by consecutively
selecting desired features based on their individual
needs. However, as most features are interdependent,
users must understand the impact of their gradual
selections in order to make valid decisions. Thus,
especially when dealing with large feature models,
specialized assistance is needed to guide the users in
configuring their product. Recently, recommender
systems have proved to be an appropriate mean to assist
users in finding information and making decisions. In
this paper, we propose an advanced feature recommender
system that provides personalized recommendations to
users. In detail, we offer four main contributions: (i)
We provide a recommender system that suggests relevant
features to ease the decision-making process. (ii)
Based on this system, we provide visual support to
users that guides them through the decision-making
process and allows them to focus on valid and relevant
parts of the configuration space. (iii) We provide an
interactive open-source configurator tool encompassing
all those features. (iv) In order to demonstrate the
performance of our approach, we compare three different
recommender algorithms in two real case studies derived
from business experience.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Kowal:2017:EAF,
author = "Matthias Kowal and Sofia Ananieva and Thomas
Th{\"u}m",
title = "Explaining anomalies in feature models",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "132--143",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993248",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The development of variable software, in general, and
feature models, in particular, is an error-prone and
time-consuming task. It gets increasingly more
challenging with industrial-size models containing
hundreds or thousands of features and constraints. Each
change may lead to anomalies in the feature model such
as making some features impossible to select. While the
detection of anomalies is well-researched, giving
explanations is still a challenge. Explanations must be
as accurate and understandable as possible to support
the developer in repairing the source of an error. We
propose an efficient and generic algorithm for
explaining different anomalies in feature models.
Additionally, we achieve a benefit for the developer by
computing short explanations expressed in a
user-friendly manner and by emphasizing specific parts
in explanations that are more likely to be the cause of
an anomaly. We provide an open-source implementation in
FeatureIDE and show its scalability for industrial-size
feature models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Al-Hajjaji:2017:IEP,
author = "Mustafa Al-Hajjaji and Sebastian Krieter and Thomas
Th{\"u}m and Malte Lochau and Gunter Saake",
title = "{IncLing}: efficient product-line testing using
incremental pairwise sampling",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "144--155",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993253",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A software product line comprises a family of software
products that share a common set of features. It
enables customers to compose software systems from a
managed set of features. Testing every product of a
product line individually is often infeasible due to
the exponential number of possible products in the
number of features. Several approaches have been
proposed to restrict the number of products to be
tested by sampling a subset of products achieving
sufficient combinatorial interaction coverage. However,
existing sampling algorithms do not scale well to large
product lines, as they require a considerable amount of
time to generate the samples. Moreover, samples are not
available until a sampling algorithm completely
terminates. As testing time is usually limited, we
propose an incremental approach of product sampling for
pairwise interaction testing (called IncLing), which
enables developers to generate samples on demand in a
step-wise manner. Furthermore, IncLing uses heuristics
to efficiently achieve pairwise interaction coverage
with a reasonable number of products. We evaluated
IncLing by comparing it against existing sampling
algorithms using feature models of different sizes. The
results of our approach indicate efficiency
improvements for product-line testing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Rothberg:2017:TSC,
author = "Valentin Rothberg and Christian Dietrich and Andreas
Ziegler and Daniel Lohmann",
title = "Towards scalable configuration testing in variable
software",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "156--167",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993252",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Testing a software product line such as Linux implies
building the source with different configurations.
Manual approaches to generate configurations that
enable code of interest are doomed to fail due to the
high amount of variation points distributed over the
feature model, the build system and the source code.
Research has proposed various approaches to generate
covering configurations, but the algorithms show many
drawbacks related to run-time, exhaustiveness and the
amount of generated configurations. Hence, analyzing an
entire Linux source can yield more than 30 thousand
configurations and thereby exceeds the limited budget
and resources for build testing. In this paper, we
present an approach to fill the gap between a
systematic generation of configurations and the
necessity to fully build software in order to test it.
By merging previously generated configurations, we
reduce the number of necessary builds and enable global
variability-aware testing. We reduce the problem of
merging configurations to finding maximum cliques in a
graph. We evaluate the approach on the Linux kernel,
compare the results to common practices in industry,
and show that our implementation scales even when
facing graphs with millions of edges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Steindorfer:2017:TSP,
author = "Michael J. Steindorfer and Jurgen J. Vinju",
title = "Towards a software product line of trie-based
collections",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "168--172",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993251",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Collection data structures in standard libraries of
programming languages are designed to excel for the
average case by carefully balancing memory footprint
and runtime performance. These implicit design
decisions and hard-coded trade-offs do constrain users
from using an optimal variant for a given problem.
Although a wide range of specialized collections is
available for the Java Virtual Machine (JVM), they
introduce yet another dependency and complicate user
adoption by requiring specific Application Program
Interfaces (APIs) incompatible with the standard
library. A product line for collection data structures
would relieve library designers from optimizing for the
general case. Furthermore, a product line allows
evolving the potentially large code base of a
collection family efficiently. The challenge is to find
a small core framework for collection data structures
which covers all variations without exhaustively
listing them, while supporting good performance at the
same time. We claim that the concept of Array Mapped
Tries (AMTs) embodies a high degree of commonality in
the sub-domain of immutable collection data structures.
AMTs are flexible enough to cover most of the
variability, while minimizing code bloat in the
generator and the generated code. We implemented a Data
Structure Code Generator (DSCG) that emits immutable
collections based on an AMT skeleton foundation. The
generated data structures outperform competitive
hand-optimized implementations, and the generator still
allows for customization towards specific workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Al-Hajjaji:2017:TDT,
author = "Mustafa Al-Hajjaji and Jens Meinicke and Sebastian
Krieter and Reimar Schr{\"o}ter and Thomas Th{\"u}m and
Thomas Leich and Gunter Saake",
title = "Tool demo: testing configurable systems with
{FeatureIDE}",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "173--177",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993254",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most software systems are designed to provide custom
functionality using configuration options. Testing such
systems is challenging as running tests of a single
configuration is often not sufficient, because defects
may appear in other configurations. Ideally, all
configurations of a software system should be tested,
which is usually not applicable in practice due to the
combinatorial explosion with respect to the
configuration options. Multiple sampling strategies aim
to reduce the set of tested configurations to a
feasible amount, such as T-wise sampling, random
configurations, and user-defined configurations.
However, these strategies are often not applied in
practice as they require manual effort or a specialized
testing framework. Within our tool FeatureIDE, we
integrate all aforementioned strategies and reduce the
manual effort by automating the process of generating
and testing configurations. Furthermore, we provide
support for unit testing to avoid redundant test
executions and for variability-aware testing. With this
extension of FeatureIDE, we aim to make recent testing
techniques for configurable systems applicable in
practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Makki:2017:ART,
author = "Majid Makki and Dimitri {Van Landuyt} and Wouter
Joosen",
title = "Automated regression testing of {BPMN 2.0} processes:
a capture and replay framework for continuous
delivery",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "178--189",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993257",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Regression testing is a form of software quality
assurance (QA) that involves comparing the behavior of
a newer version of a software artifact to its earlier
correct behavior, and signaling the QA engineer when
deviations are detected. Given the large potential in
automated generation and execution of regression test
cases for business process models in the context of
running systems, powerful tools are required to make
this practically feasible, more specifically to limit
the potential impact on production systems, and to
reduce the manual effort required from QA engineers. In
this paper, we present a regression testing automation
framework that implements the capture {\&} replay
paradigm in the context of BPMN 2.0, a domain-specific
language for modeling and executing business processes.
The framework employs parallelization techniques and
efficient communication patterns to reduce the
performance overhead of capturing. Based on inputs from
the QA engineer, it manipulates the BPMN2 model before
executing tests for isolating the latter from external
dependencies (e.g. human actors or expensive web
services) and for avoiding undesired side-effects.
Finally, it performs a regression detection algorithm
and reports the results to the QA engineer. We have
implemented our framework on top of a BPMN2-compliant
execution engine, namely jBPM, and performed functional
validations and evaluations of its performance and
fault-tolerance. The results, indicating 3.9\% average
capturing performance overhead, demonstrate that the
implemented framework can be the foundation of a
practical regression testing tool for BPMN 2.0, and a
key enabler for continuous delivery of business
process-driven applications and services.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Hammer:2017:VOV,
author = "Matthew A. Hammer and Bor-Yuh Evan Chang and David
{Van Horn}",
title = "A vision for online verification-validation",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "190--201",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993255",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's programmers face a false choice between
creating software that is extensible and software that
is correct. Specifically, dynamic languages permit
software that is richly extensible (via dynamic code
loading, dynamic object extension, and various forms of
reflection), and today's programmers exploit this
flexibility to ``bring their own language features'' to
enrich extensible languages (e.g., by using common
JavaScript libraries). Meanwhile, such library-based
language extensions generally lack enforcement of their
abstractions, leading to programming errors that are
complex to avoid and predict. To offer verification for
this extensible world, we propose online
verification-validation (OVV), which consists of
language and VM design that enables a ``phaseless''
approach to program analysis, in contrast to the
standard static-dynamic phase distinction. Phaseless
analysis freely interposes abstract interpretation with
concrete execution, allowing analyses to use dynamic
(concrete) information to prove universal (abstract)
properties about future execution. In this paper, we
present a conceptual overview of OVV through a
motivating example program that uses a hypothetical
database library. We present a generic semantics for
OVV, and an extension to this semantics that offers a
simple gradual type system for the database library
primitives. The result of instantiating this gradual
type system in an OVV setting is a checker that can
progressively type successive continuations of the
program until a continuation is fully verified. To
evaluate the proposed vision of OVV for this example,
we implement the VM semantics (in Rust), and show that
this design permits progressive typing in this
manner.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Boussaa:2017:ANF,
author = "Mohamed Boussaa and Olivier Barais and Benoit Baudry
and Gerson Suny{\'e}",
title = "Automatic non-functional testing of code generators
families",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "202--212",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993256",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The intensive use of generative programming techniques
provides an elegant engineering solution to deal with
the heterogeneity of platforms and technological
stacks. The use of domain-specific languages for
example, leads to the creation of numerous code
generators that automatically translate highlevel
system specifications into multi-target executable
code. Producing correct and efficient code generator is
complex and error-prone. Although software designers
provide generally high-level test suites to verify the
functional outcome of generated code, it remains
challenging and tedious to verify the behavior of
produced code in terms of non-functional properties.
This paper describes a practical approach based on a
runtime monitoring infrastructure to automatically
check the potential inefficient code generators. This
infrastructure, based on system containers as execution
platforms, allows code-generator developers to evaluate
the generated code performance. We evaluate our
approach by analyzing the performance of Haxe, a
popular high-level programming language that involves a
set of cross-platform code generators. Experimental
results show that our approach is able to detect some
performance inconsistencies that reveal real issues in
Haxe code generators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '16 conference proceedings.",
}
@Article{Abadi:2016:TLF,
author = "Mart{\'\i}n Abadi",
title = "{TensorFlow}: learning functions at scale",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "1--1",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2976746",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "TensorFlow is a machine learning system that operates
at large scale and in heterogeneous environments. Its
computational model is based on dataflow graphs with
mutable state. Graph nodes may be mapped to different
machines in a cluster, and within each machine to CPUs,
GPUs, and other devices. TensorFlow supports a variety
of applications, but it particularly targets training
and inference with deep neural networks. It serves as a
platform for research and for deploying machine
learning systems across many areas, such as speech
recognition, computer vision, robotics, information
retrieval, and natural language processing. In this
talk, we describe TensorFlow and outline some of its
applications. We also discuss the question of what
TensorFlow and deep learning may have to do with
functional programming. Although TensorFlow is not
purely functional, many of its uses are concerned with
optimizing functions (during training), then with
applying those functions (during inference). These
functions are defined as compositions of simple
primitives (as is common in functional programming),
with internal data representations that are learned
rather than manually designed. TensorFlow is joint work
with many other people in the Google Brain team and
elsewhere. More information is available at
tensorflow.org.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Ryu:2016:JFB,
author = "Sukyoung Ryu",
title = "Journey to find bugs in {JavaScript} web applications
in the wild",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "2--2",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2976747",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Analyzing real-world JavaScript web applications is a
challenging task. On top of understanding the semantics
of JavaScript, it requires modeling of web documents,
platform objects, and interactions between them. Not
only the JavaScript language itself but also its usage
patterns are extremely dynamic. JavaScript can generate
code and run it during evaluation, and most web
applications load JavaScript code dynamically. Such
dynamic characteristics of JavaScript web applications
make pure static analysis approaches inapplicable. In
this talk, we present our attempts to analyze
JavaScript web applications in the wild mostly
statically using various approaches. From pure
JavaScript programs to JavaScript web applications
using platform-specific libraries and dynamic code
loading, we explain technical challenges in analyzing
each of them and how we built an open-source analysis
framework for JavaScript, SAFE, that addresses the
challenges incrementally. In spite of active research
accomplishments in analysis of JavaScript web
applications, many issues still remain to be resolved
such as events, callback functions, and hybrid web
applications. We discuss possible future research
directions and open challenges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Licata:2016:FPG,
author = "Dan Licata",
title = "A functional programmer's guide to homotopy type
theory",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "3--3",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2976748",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dependent type theories are functional programming
languages with types rich enough to do computer-checked
mathematics and software verification. Homotopy type
theory is a recent area of work that connects dependent
type theory to the mathematical disciplines of homotopy
theory and higher-dimensional category theory. From a
programming point of view, these connections have
revealed that all types in dependent type theory
support a certain generic program that had not
previously been exploited. Specifically, each type can
be equipped with computationally relevant witnesses of
equality of elements of that type, and all types
support a generic program that transports elements
along these equalities. One mechanism for equipping
types with non-trivial witnesses of equality is
Voevodsky's univalence axiom, which implies that
equality of types themselves is witnessed by type
isomorphism. Another is higher inductive types, an
extended datatype schema that allows identifications
between different datatype constructors. While these
new mechanisms were originally formulated as axiomatic
extensions of type theory, recent work has investigated
their computational meaning, leading to the development
of new programming languages that better support them.
In this talk, I will illustrate what univalence and
higher inductive types mean in programming terms. I
will also discuss how studying some related semantic
settings can reveal additional structure on types; for
example, moving from groupoids (categories where all
maps are invertible) to general categories yields an
account of coercions instead of equalities. Overall, I
hope to convey some of the beauty and richness of these
connections between disciplines, which we are just
beginning to understand.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Castro:2016:FPS,
author = "David Castro and Kevin Hammond and Susmit Sarkar",
title = "Farms, pipes, streams and reforestation: reasoning
about structured parallel processes using types and
hylomorphisms",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "4--17",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951920",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The increasing importance of parallelism has motivated
the creation of better abstractions for writing
parallel software, including structured parallelism
using nested algorithmic skeletons. Such approaches
provide high-level abstractions that avoid common
problems, such as race conditions, and often allow
strong cost models to be defined. However, choosing a
combination of algorithmic skeletons that yields good
parallel speedups for a program on some specific
parallel architecture remains a difficult task. In
order to achieve this, it is necessary to
simultaneously reason both about the costs of different
parallel structures and about the semantic equivalences
between them. This paper presents a new type-based
mechanism that enables strong static reasoning about
these properties. We exploit well-known properties of a
very general recursion pattern, hylomorphisms, and give
a denotational semantics for structured parallel
processes in terms of these hylomorphisms. Using our
approach, it is possible to determine formally whether
it is possible to introduce a desired parallel
structure into a program without altering its
functional behaviour, and also to choose a version of
that parallel structure that minimises some given cost
model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Acar:2016:DCC,
author = "Umut A. Acar and Arthur Chargu{\'e}raud and Mike
Rainey and Filip Sieczkowski",
title = "Dag-calculus: a calculus for parallel computation",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "18--32",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951946",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Increasing availability of multicore systems has led
to greater focus on the design and implementation of
languages for writing parallel programs. Such languages
support various abstractions for parallelism, such as
fork-join, async-finish, futures. While they may seem
similar, these abstractions lead to different
semantics, language design and implementation
decisions, and can significantly impact the performance
of end-user applications. In this paper, we consider
the question of whether it would be possible to unify
various paradigms of parallel computing. To this end,
we propose a calculus, called dag calculus, that can
encode fork-join, async-finish, and futures, and
possibly others. We describe dag calculus and its
semantics, establish translations from the
aforementioned paradigms into dag calculus. These
translations establish that dag calculus is
sufficiently powerful for encoding programs written in
prevailing paradigms of parallelism. We present
concurrent algorithms and data structures for realizing
dag calculus on multicore hardware and prove that the
proposed techniques are consistent with the semantics.
Finally, we present an implementation of the calculus
and evaluate it empirically by comparing its
performance to highly optimized code from prior work.
The results show that the calculus is expressive and
that it competes well with, and sometimes outperforms,
the state of the art.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Borgstrom:2016:LCF,
author = "Johannes Borgstr{\"o}m and Ugo {Dal Lago} and Andrew
D. Gordon and Marcin Szymczak",
title = "A lambda-calculus foundation for universal
probabilistic programming",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "33--46",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951942",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We develop the operational semantics of an untyped
probabilistic \lambda -calculus with continuous
distributions, and both hard and soft constraints,as a
foundation for universal probabilistic programming
languages such as Church, Anglican, and Venture. Our
first contribution is to adapt the classic operational
semantics of \lambda -calculus to a continuous setting
via creating a measure space on terms and defining
step-indexed approximations. We prove equivalence of
big-step and small-step formulations of this
distribution-based semantics. To move closer to
inference techniques, we also define the sampling-based
semantics of a term as a function from a trace of
random samples to a value. We show that the
distribution induced by integration over the space of
traces equals the distribution-based semantics. Our
second contribution is to formalize the implementation
technique of trace Markov chain Monte Carlo (MCMC) for
our calculus and to show its correctness. A key step is
defining sufficient conditions for the distribution
induced by trace MCMC to converge to the
distribution-based semantics. To the best of our
knowledge, this is the first rigorous correctness proof
for trace MCMC for a higher-order functional language,
or for a language with soft constraints.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Ismail:2016:DPD,
author = "Wazim Mohammed Ismail and Chung-chieh Shan",
title = "Deriving a probability density calculator (functional
pearl)",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "47--59",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951922",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Given an expression that denotes a probability
distribution, often we want a corresponding density
function, to use in probabilistic inference.
Fortunately, the task of finding a density has been
automated. It turns out that we can derive a
compositional procedure for finding a density, by
equational reasoning about integrals, starting with the
mathematical specification of what a density is.
Moreover, the density found can be run as an estimation
algorithm, as well as simplified as an exact formula to
improve the estimate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Tan:2016:NVC,
author = "Yong Kiam Tan and Magnus O. Myreen and Ramana Kumar
and Anthony Fox and Scott Owens and Michael Norrish",
title = "A new verified compiler backend for {CakeML}",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "60--73",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951924",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We have developed and mechanically verified a new
compiler backend for CakeML. Our new compiler features
a sequence of intermediate languages that allows it to
incrementally compile away high-level features and
enables verification at the right levels of semantic
detail. In this way, it resembles mainstream
(unverified) compilers for strict functional languages.
The compiler supports efficient curried multi-argument
functions, configurable data representations,
exceptions that unwind the call stack, register
allocation, and more. The compiler targets several
architectures: x86-64, ARMv6, ARMv8, MIPS-64, and
RISC-V. In this paper, we present the overall structure
of the compiler, including its 12 intermediate
languages, and explain how everything fits together. We
focus particularly on the interaction between the
verification of the register allocator and the garbage
collector, and memory representations. The entire
development has been carried out within the HOL4
theorem prover.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Downen:2016:SCC,
author = "Paul Downen and Luke Maurer and Zena M. Ariola and
Simon Peyton Jones",
title = "Sequent calculus as a compiler intermediate language",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "74--88",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951931",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The $ \lambda $ -calculus is popular as an
intermediate language for practical compilers. But in
the world of logic it has a lesser-known twin, born at
the same time, called the sequent calculus. Perhaps
that would make for a good intermediate language, too?
To explore this question we designed Sequent Core, a
practically-oriented core calculus based on the sequent
calculus, and used it to re-implement a substantial
chunk of the Glasgow Haskell Compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{OConnor:2016:RTR,
author = "Liam O'Connor and Zilin Chen and Christine Rizkallah
and Sidney Amani and Japheth Lim and Toby Murray and
Yutaka Nagashima and Thomas Sewell and Gerwin Klein",
title = "Refinement through restraint: bringing down the cost
of verification",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "89--102",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951940",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a framework aimed at significantly reducing
the cost of verifying certain classes of systems
software, such as file systems. Our framework allows
for equational reasoning about systems code written in
our new language, Cogent. Cogent is a restricted,
polymorphic, higher-order, and purely functional
language with linear types and without the need for a
trusted runtime or garbage collector. Linear types
allow us to assign two semantics to the language: one
imperative, suitable for efficient C code generation;
and one functional, suitable for equational reasoning
and verification. As Cogent is a restricted language,
it is designed to easily interoperate with existing C
functions and to connect to existing C verification
frameworks. Our framework is based on certifying
compilation: For a well-typed Cogent program, our
compiler produces C code, a high-level shallow
embedding of its semantics in Isabelle/HOL, and a proof
that the C code correctly refines this embedding. Thus
one can reason about the full semantics of real-world
systems code productively and equationally, while
retaining the interoperability and leanness of C. The
compiler certificate is a series of language-level
proofs and per-program translation validation phases,
combined into one coherent top-level theorem in
Isabelle/HOL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{New:2016:FAC,
author = "Max S. New and William J. Bowman and Amal Ahmed",
title = "Fully abstract compilation via universal embedding",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "103--116",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951941",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A fully abstract compiler guarantees that two source
components are observationally equivalent in the source
language if and only if their translations are
observationally equivalent in the target. Full
abstraction implies the translation is secure:
target-language attackers can make no more observations
of a compiled component than a source-language attacker
interacting with the original source component. Proving
full abstraction for realistic compilers is challenging
because realistic target languages contain features
(such as control effects) unavailable in the source,
while proofs of full abstraction require showing that
every target context to which a compiled component may
be linked can be back-translated to a behaviorally
equivalent source context. We prove the first full
abstraction result for a translation whose target
language contains exceptions, but the source does not.
Our translation---specifically, closure conversion of
simply typed $ \lambda $-calculus with recursive
types---uses types at the target level to ensure that a
compiled component is never linked with attackers that
have more distinguishing power than source-level
attackers. We present a new back-translation technique
based on a shallow embedding of the target language
into the source language at a dynamic type. Then
boundaries are inserted that mediate terms between the
untyped embedding and the strongly-typed source. This
technique allows back-translating non-terminating
programs, target features that are untypeable in the
source, and well-bracketed effects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Dimoulas:2016:OLP,
author = "Christos Dimoulas and Max S. New and Robert Bruce
Findler and Matthias Felleisen",
title = "{Oh Lord}, please don't let contracts be misunderstood
(functional pearl)",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "117--131",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951930",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Contracts feel misunderstood, especially those with a
higher-order soul. While software engineers appreciate
contracts as tools for articulating the interface
between components, functional programmers desperately
search for their types and meaning, completely
forgetting about their pragmatics. This gem presents a
novel analysis of contract systems. Applied to the
higher-order kind, this analysis reveals their large
and clearly unappreciated software engineering
potential. Three sample applications illustrate where
this kind of exploration may lead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Cicek:2016:TTI,
author = "Ezgi {\c{C}}i{\c{c}}ek and Zoe Paraskevopoulou and
Deepak Garg",
title = "A type theory for incremental computational complexity
with control flow changes",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "132--145",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951950",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Incremental computation aims to speed up re-runs of a
program after its inputs have been modified slightly.
It works by recording a trace of the program's first
run and propagating changes through the trace in
incremental runs, trying to re-use as much of the
original trace as possible. The recent work CostIt is a
type and effect system to establish the time complexity
of incremental runs of a program, as a function of
input changes. However, CostIt is limited in two ways.
First, it prohibits input changes that influence
control flow. This makes it impossible to type programs
that, for instance, branch on inputs that may change.
Second, the soundness of CostIt is proved relative to
an abstract cost semantics, but it is unclear how the
semantics can be realized. In this paper, we address
both these limitations. We present DuCostIt, a
re-design of CostIt, that combines reasoning about
costs of change propagation and costs of from-scratch
evaluation. The latter lifts the restriction on control
flow changes. To obtain the type system, we refine Flow
Caml, a type system for information flow analysis, with
cost effects. Additionally, we inherit from CostIt
index refinements to track data structure sizes and a
co-monadic type. Using a combination of binary and
unary step-indexed logical relations, we prove
DuCostIt's cost analysis sound relative to not only an
abstract cost semantics, but also a concrete semantics,
which is obtained by translation to an ML-like
language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Takeda:2016:CBE,
author = "Kotaro Takeda and Naoki Kobayashi and Kazuya Yaguchi
and Ayumi Shinohara",
title = "Compact bit encoding schemes for simply-typed
lambda-terms",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "146--157",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951918",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We consider the problem of how to compactly encode
simply-typed \lambda -terms into bit strings. The work
has been motivated by Kobayashi et al.'s recent work on
higher-order data compression, where data are encoded
as functional programs (or, \lambda -terms) that
generate them. To exploit its good compression power,
the compression scheme has to come with a method for
compactly encoding the \lambda -terms into bit strings.
To this end, we propose two type-based bit-encoding
schemes; the first one encodes a \lambda -term into a
sequence of symbols by using type information, and then
applies arithmetic coding to convert the sequence to a
bit string. The second one is more sophisticated; we
prepare a context-free grammar (CFG) that describes
only well-typed terms, and then use a variation of
arithmetic coding specialized for the CFG. We have
implemented both schemes and confirmed that they often
output more compact codes than previous bit encoding
schemes for \lambda -terms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Mu:2016:QGO,
author = "Shin-Cheng Mu and Yu-Hsi Chiang and Yu-Han Lyu",
title = "Queueing and glueing for optimal partitioning
(functional pearl)",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "158--167",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951923",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The queueing-glueing algorithm is the nickname we give
to an algorithmic pattern that provides amortised
linear time solutions to a number of optimal list
partition problems that have a peculiar property: at
various moments we know that two of three candidate
solutions could be optimal. The algorithm works by
keeping a queue of lists, glueing them from one end,
while chopping from the other end, hence the name. We
give a formal derivation of the algorithm, and
demonstrate it with several non-trivial examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Christiansen:2016:ASP,
author = "Jan Christiansen and Nikita Danilenko and Sandra
Dylus",
title = "All sorts of permutations (functional pearl)",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "168--179",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951949",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The combination of non-determinism and sorting is
mostly associated with permutation sort, a sorting
algorithm that is not very useful for sorting and has
an awful running time. In this paper we look at the
combination of non-determinism and sorting in a
different light: given a sorting function, we apply it
to a non-deterministic predicate to gain a function
that enumerates permutations of the input list. We get
to the bottom of necessary properties of the sorting
algorithms and predicates in play as well as discuss
variations of the modelled non-determinism. On top of
that, we formulate and prove a theorem stating that no
matter which sorting function we use, the corresponding
permutation function enumerates all permutations of the
input list. We use free theorems, which are derived
from the type of a function alone, to prove the
statement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Serrano:2016:GH,
author = "Manuel Serrano and Vincent Prunet",
title = "A glimpse of {Hopjs}",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "180--192",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951916",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hop.js is a multitier programming environment for
JavaScript. It allows a single JavaScript program to
describe the client-side and the server-side components
of a web application. Its runtime environment ensures
consistent executions of the application on the server
and on the client. This paper overviews the Hop.js
design. It shows the JavaScript extensions that makes
it possible to conceive web applications globally. It
presents how Hop.js interacts with the outside world.
It also briefly presents the Hop.js implementation. It
presents the Hop.js web server implementation, the
handling of server-side parallelism, and the JavaScript
and HTML compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Sergey:2016:ERG,
author = "Ilya Sergey",
title = "Experience report: growing and shrinking polygons for
random testing of computational geometry algorithms",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "193--199",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951927",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper documents our experience of adapting and
using the QuickCheck-style approach for extensive
randomised property-based testing of computational
geometry algorithms. The need in rigorous evaluation of
computational geometry procedures has naturally arisen
in our quest of organising a medium-size programming
contest for second year university students-an
experiment we conducted as an attempt to introduce them
to computational geometry. The main effort in
organising the event was implementation of a solid
infrastructure for testing and ranking solutions. For
this, we employed functional programming techniques.
The choice of the language and the paradigm made it
possible for us to engineer, from scratch and in a very
short period of time, a series of robust geometric
primitives and algorithms, as well as implement a
scalable framework for their randomised testing. We
describe the main insights, enabling efficient random
testing of geometric procedures, and report on our
experience of using the testing framework, which helped
us to detect and fix a number of issues not just in our
programming artefacts, but also in the published
algorithms we had implemented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Emoto:2016:TLV,
author = "Kento Emoto and Kiminori Matsuzaki and Zhenjiang Hu
and Akimasa Morihata and Hideya Iwasaki",
title = "Think like a vertex, behave like a function! {A}
functional {DSL} for vertex-centric big graph
processing",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "200--213",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951938",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The vertex-centric programming model, known as ``think
like a vertex'', is being used more and more to support
various big graph processing methods through iterative
supersteps that execute in parallel a user-defined
vertex program over each vertex of a graph. However,
the imperative and message-passing style of existing
systems makes defining a vertex program unintuitive. In
this paper, we show that one can benefit more from
``Thinking like a vertex'' by ``Behaving like a
function'' rather than ``Acting like a procedure'' with
full use of side effects and explicit control of
message passing, state, and termination. We propose a
functional approach to vertex-centric graph processing
in which the computation at every vertex is abstracted
as a higher-order function and present Fregel, a new
domain-specific language. Fregel has clear functional
semantics, supports declarative description of vertex
computation, and can be automatically translated into
Pregel, an emerging imperative-style distributed graph
processing framework, and thereby achieve promising
performance. Experimental results for several typical
examples show the promise of this functional
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Arntzenius:2016:DFD,
author = "Michael Arntzenius and Neelakantan R. Krishnaswami",
title = "{Datafun}: a functional {Datalog}",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "214--227",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951948",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Datalog may be considered either an unusually powerful
query language or a carefully limited logic programming
language. Datalog is declarative, expressive, and
optimizable, and has been applied successfully in a
wide variety of problem domains. However, most
use-cases require extending Datalog in an
application-specific manner. In this paper we define
Datafun, an analogue of Datalog supporting higher-order
functional programming. The key idea is to track
monotonicity with types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Seidel:2016:DWS,
author = "Eric L. Seidel and Ranjit Jhala and Westley Weimer",
title = "Dynamic witnesses for static type errors (or,
ill-typed programs usually go wrong)",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "228--242",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951915",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Static type errors are a common stumbling block for
newcomers to typed functional languages. We present a
dynamic approach to explaining type errors by
generating counterexample witness inputs that
illustrate how an ill-typed program goes wrong. First,
given an ill-typed function, we symbolically execute
the body to synthesize witness values that make the
program go wrong. We prove that our procedure
synthesizes general witnesses in that if a witness is
found, then for all inhabited input types, there exist
values that can make the function go wrong. Second, we
show how to extend the above procedure to produce a
reduction graph that can be used to interactively
visualize and debug witness executions. Third, we
evaluate the coverage of our approach on two data sets
comprising over 4,500 ill-typed student programs. Our
technique is able to generate witnesses for 88\% of the
programs, and our reduction graph yields small
counterexamples for 81\% of the witnesses. Finally, we
evaluate whether our witnesses help students understand
and fix type errors, and find that students presented
with our witnesses show a greater understanding of type
errors than those presented with a standard error
message.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Watanabe:2016:ADF,
author = "Keiichi Watanabe and Ryosuke Sato and Takeshi Tsukada
and Naoki Kobayashi",
title = "Automatically disproving fair termination of
higher-order functional programs",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "243--255",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951919",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose an automated method for disproving fair
termination of higher-order functional programs, which
is complementary to Murase et al.'s recent method for
proving fair termination. A program is said to be fair
terminating if it has no infinite execution trace that
satisfies a given fairness constraint. Fair termination
is an important property because program verification
problems for arbitrary \omega -regular temporal
properties can be transformed to those of fair
termination. Our method reduces the problem of
disproving fair termination to higher-order model
checking by using predicate abstraction and CEGAR.
Given a program, we convert it to an abstract program
that generates an approximation of the (possibly
infinite) execution traces of the original program, so
that the original program has a fair infinite execution
trace if the tree generated by the abstract program
satisfies a certain property. The method is a
non-trivial extension of Kuwahara et al.'s method for
disproving plain termination.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Jung:2016:HOG,
author = "Ralf Jung and Robbert Krebbers and Lars Birkedal and
Derek Dreyer",
title = "Higher-order ghost state",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "256--269",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951943",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The development of concurrent separation logic (CSL)
has sparked a long line of work on modular verification
of sophisticated concurrent programs. Two of the most
important features supported by several existing
extensions to CSL are higher-order quantification and
custom ghost state. However, none of the logics that
support both of these features reap the full potential
of their combination. In particular, none of them
provide general support for a feature we dub
``higher-order ghost state'': the ability to store
arbitrary higher-order separation-logic predicates in
ghost variables. In this paper, we propose higher-order
ghost state as a interesting and useful extension to
CSL, which we formalize in the framework of Jung et
al.'s recently developed Iris logic. To justify its
soundness, we develop a novel algebraic structure
called CMRAs (``cameras''), which can be thought of as
``step-indexed partial commutative monoids''. Finally,
we show that Iris proofs utilizing higher-order ghost
state can be effectively formalized in Coq, and discuss
the challenges we faced in formalizing them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Cockx:2016:UEP,
author = "Jesper Cockx and Dominique Devriese and Frank
Piessens",
title = "Unifiers as equivalences: proof-relevant unification
of dependently typed data",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "270--283",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951917",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dependently typed languages such as Agda, Coq and
Idris use a syntactic first-order unification algorithm
to check definitions by dependent pattern matching.
However, these algorithms don't adequately consider the
types of the terms being unified, leading to various
unintended results. As a consequence, they require ad
hoc restrictions to preserve soundness, but this makes
them very hard to prove correct, modify, or extend.
This paper proposes a framework for reasoning formally
about unification in a dependently typed setting. In
this framework, unification rules compute not just a
unifier but also a corresponding correctness proof in
the form of an equivalence between two sets of
equations. By rephrasing the standard unification rules
in a proof-relevant manner, they are guaranteed to
preserve soundness of the theory. In addition, it
enables us to safely add new rules that can exploit the
dependencies between the types of equations. Using our
framework, we reimplemented the unification algorithm
used by Agda. As a result, we were able to replace
previous ad hoc restrictions with formally verified
unification rules, fixing a number of bugs in the
process. We are convinced this will also enable the
addition of new and interesting unification rules in
the future, without compromising soundness along the
way.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Christiansen:2016:ERE,
author = "David Christiansen and Edwin Brady",
title = "Elaborator reflection: extending {Idris} in {Idris}",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "284--297",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951932",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many programming languages and proof assistants are
defined by elaboration from a high-level language with
a great deal of implicit information to a highly
explicit core language. In many advanced languages,
these elaboration facilities contain powerful tools for
program construction, but these tools are rarely
designed to be repurposed by users. We describe
elaborator reflection, a paradigm for metaprogramming
in which the elaboration machinery is made directly
available to metaprograms, as well as a concrete
realization of elaborator reflection in Idris, a
functional language with full dependent types. We
demonstrate the applicability of Idris's reflected
elaboration framework to a number of realistic
problems, we discuss the motivation for the specific
features of its design, and we explore the broader
meaning of elaborator reflection as it can relate to
other languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Dagand:2016:PTE,
author = "Pierre-Evariste Dagand and Nicolas Tabareau and
{\'E}ric Tanter",
title = "Partial type equivalences for verified dependent
interoperability",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "298--310",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951933",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Full-spectrum dependent types promise to enable the
development of correct-by-construction software.
However, even certified software needs to interact with
simply-typed or untyped programs, be it to perform
system calls, or to use legacy libraries. Trading
static guarantees for runtime checks, the dependent
interoperability framework provides a mechanism by
which simply-typed values can safely be coerced to
dependent types and, conversely, dependently-typed
programs can defensively be exported to a simply-typed
application. In this paper, we give a semantic account
of dependent interoperability. Our presentation relies
on and is guided by a pervading notion of type
equivalence, whose importance has been emphasized in
recent work on homotopy type theory. Specifically, we
develop the notion of partial type equivalences as a
key foundation for dependent interoperability. Our
framework is developed in Coq; it is thus constructive
and verified in the strictest sense of the terms. Using
our library, users can specify domain-specific partial
equivalences between data structures. Our library then
takes care of the (sometimes, heavy) lifting that leads
to interoperable programs. It thus becomes possible, as
we shall illustrate, to internalize and hand-tune the
extraction of dependently-typed programs to
interoperable OCaml programs within Coq itself.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Darais:2016:CGC,
author = "David Darais and David {Van Horn}",
title = "Constructive {Galois} connections: taming the {Galois}
connection framework for mechanized metatheory",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "311--324",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951934",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Galois connections are a foundational tool for
structuring abstraction in semantics and their use lies
at the heart of the theory of abstract interpretation.
Yet, mechanization of Galois connections remains
limited to restricted modes of use, preventing their
general application in mechanized metatheory and
certified programming. This paper presents constructive
Galois connections, a variant of Galois connections
that is effective both on paper and in proof
assistants; is complete with respect to a large subset
of classical Galois connections; and enables more
general reasoning principles, including the
``calculational'' style advocated by Cousot. To design
constructive Galois connection we identify a restricted
mode of use of classical ones which is both general and
amenable to mechanization in dependently-typed
functional programming languages. Crucial to our
metatheory is the addition of monadic structure to
Galois connections to control a ``specification
effect''. Effectful calculations may reason
classically, while pure calculations have extractable
computational content. Explicitly moving between the
worlds of specification and implementation is enabled
by our metatheory. To validate our approach, we provide
two case studies in mechanizing existing proofs from
the literature: one uses calculational abstract
interpretation to design a static analyzer, the other
forms a semantic basis for gradual typing. Both
mechanized proofs closely follow their original
paper-and-pencil counterparts, employ reasoning
principles not captured by previous mechanization
approaches, support the extraction of verified
algorithms, and are novel.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Blazy:2016:AMF,
author = "Sandrine Blazy and Vincent Laporte and David
Pichardie",
title = "An abstract memory functor for verified {C} static
analyzers",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "325--337",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951937",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Abstract interpretation provides advanced techniques
to infer numerical invariants on programs. There is an
abundant literature about numerical abstract domains
that operate on scalar variables. This work deals with
lifting these techniques to a realistic C memory model.
We present an abstract memory functor that takes as
argument any standard numerical abstract domain, and
builds a memory abstract domain that finely tracks
properties about memory contents, taking into account
union types, pointer arithmetic and type casts. This
functor is implemented and verified inside the Coq
proof assistant with respect to the CompCert compiler
memory model. Using the Coq extraction mechanism, it is
fully executable and used by the Verasco C static
analyzer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{McDonell:2016:GTS,
author = "Trevor L. McDonell and Timothy A. K. Zakian and Matteo
Cimini and Ryan R. Newton",
title = "Ghostbuster: a tool for simplifying and converting
{GADTs}",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "338--350",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951914",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Generalized Algebraic Dataypes, or simply GADTs, can
encode non-trivial properties in the types of the
constructors. Once such properties are encoded in a
datatype, however, all code manipulating that datatype
must provide proof that it maintains these properties
in order to typecheck. In this paper, we take a step
towards gradualizing these obligations. We introduce a
tool, Ghostbuster, that produces simplified versions of
GADTs which elide selected type parameters, thereby
weakening the guarantees of the simplified datatype in
exchange for reducing the obligations necessary to
manipulate it. Like ornaments, these simplified
datatypes preserve the recursive structure of the
original, but unlike ornaments we focus on
information-preserving bidirectional transformations.
Ghostbuster generates type-safe conversion functions
between the original and simplified datatypes, which we
prove are the identity function when composed. We
evaluate a prototype tool for Haskell against thousands
of GADTs found on the Hackage package database,
generating simpler Haskell'98 datatypes and round-trip
conversion functions between the two.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Thibodeau:2016:ICT,
author = "David Thibodeau and Andrew Cave and Brigitte Pientka",
title = "Indexed codata types",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "351--363",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951929",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Indexed data types allow us to specify and verify many
interesting invariants about finite data in a general
purpose programming language. In this paper we
investigate the dual idea: indexed codata types, which
allow us to describe data-dependencies about infinite
data structures. Unlike finite data which is defined by
constructors, we define infinite data by observations.
Dual to pattern matching on indexed data which may
refine the type indices, we define copattern matching
on indexed codata where type indices guard observations
we can make. Our key technical contributions are
three-fold: first, we extend Levy's call-by-push value
language with support for indexed (co)data and deep
(co)pattern matching; second, we provide a clean
foundation for dependent (co)pattern matching using
equality constraints; third, we describe a small-step
semantics using a continuation-based abstract machine,
define coverage for indexed (co)patterns, and prove
type safety. This is an important step towards building
a foundation where (co)data type definitions and
dependent types can coexist.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Oliveira:2016:DIT,
author = "Bruno C. d. S. Oliveira and Zhiyuan Shi and Jo{\~a}o
Alpuim",
title = "Disjoint intersection types",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "364--377",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951945",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dunfield showed that a simply typed core calculus with
intersection types and a merge operator is able to
capture various programming language features. While
his calculus is type-safe, it is not coherent:
different derivations for the same expression can
elaborate to expressions that evaluate to different
values. The lack of coherence is an important
disadvantage for adoption of his core calculus in
implementations of programming languages, as the
semantics of the programming language becomes
implementation-dependent. This paper presents \lambda
_i: a coherent and type-safe calculus with a form of
intersection types and a merge operator. Coherence is
achieved by ensuring that intersection types are
disjoint and programs are sufficiently annotated to
avoid type ambiguity. We propose a definition of
disjointness where two types A and B are disjoint only
if certain set of types are common supertypes of A and
B. We investigate three different variants of \lambda
_i, with three variants of disjointness. In the
simplest variant, which does not allow \top types, two
types are disjoint if they do not share any common
supertypes at all. The other two variants introduce
\top types and refine the notion of disjointness to
allow two types to be disjoint when the only the set of
common supertypes are top-like. The difference between
the two variants with \top types is on the definition
of top-like types, which has an impact on which types
are allowed on intersections. We present a type system
that prevents intersection types that are not disjoint,
as well as an algorithmic specifications to determine
whether two types are disjoint for all three
variants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Castagna:2016:STT,
author = "Giuseppe Castagna and Tommaso Petrucciani and Kim
Nguy{\~{\^e}}n",
title = "Set-theoretic types for polymorphic variants",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "378--391",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951928",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Polymorphic variants are a useful feature of the OCaml
language whose current definition and implementation
rely on kinding constraints to simulate a subtyping
relation via unification. This yields an awkward
formalization and results in a type system whose
behaviour is in some cases unintuitive and/or unduly
restrictive. In this work, we present an alternative
formalization of polymorphic variants, based on
set-theoretic types and subtyping, that yields a
cleaner and more streamlined system. Our formalization
is more expressive than the current one (it types more
programs while preserving type safety), it can
internalize some meta-theoretic properties, and it
removes some pathological cases of the current
implementation resulting in a more intuitive and, thus,
predictable type system. More generally, this work
shows how to add full-fledged union types to functional
languages of the ML family that usually rely on the
Hindley-Milner type system. As an aside, our system
also improves the theory of semantic subtyping, notably
by proving completeness for the type reconstruction
algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Raghunathan:2016:HMM,
author = "Ram Raghunathan and Stefan K. Muller and Umut A. Acar
and Guy Blelloch",
title = "Hierarchical memory management for parallel programs",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "392--406",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951935",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "An important feature of functional programs is that
they are parallel by default. Implementing an efficient
parallel functional language, however, is a major
challenge, in part because the high rate of allocation
and freeing associated with functional programs
requires an efficient and scalable memory manager. In
this paper, we present a technique for parallel memory
management for strict functional languages with nested
parallelism. At the highest level of abstraction, the
approach consists of a technique to organize memory as
a hierarchy of heaps, and an algorithm for performing
automatic memory reclamation by taking advantage of a
disentanglement property of parallel functional
programs. More specifically, the idea is to assign to
each parallel task its own heap in memory and organize
the heaps in a hierarchy/tree that mirrors the
hierarchy of tasks. We present a nested-parallel
calculus that specifies hierarchical heaps and prove in
this calculus a disentanglement property, which
prohibits a task from accessing objects allocated by
another task that might execute in parallel. Leveraging
the disentanglement property, we present a garbage
collection technique that can operate on any subtree in
the memory hierarchy concurrently as other tasks
(and/or other collections) proceed in parallel. We
prove the safety of this collector by formalizing it in
the context of our parallel calculus. In addition, we
describe how the proposed techniques can be implemented
on modern shared-memory machines and present a
prototype implementation as an extension to MLton, a
high-performance compiler for the Standard ML language.
Finally, we evaluate the performance of this
implementation on a number of parallel benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Gilray:2016:ACP,
author = "Thomas Gilray and Michael D. Adams and Matthew Might",
title = "Allocation characterizes polyvariance: a unified
methodology for polyvariant control-flow analysis",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "407--420",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951936",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The polyvariance of a static analysis is the degree to
which it structurally differentiates approximations of
program values. Polyvariant techniques come in a number
of different flavors that represent alternative
heuristics for managing the trade-off an analysis
strikes between precision and complexity. For example,
call sensitivity supposes that values will tend to
correlate with recent call sites, object sensitivity
supposes that values will correlate with the allocation
points of related objects, the Cartesian product
algorithm supposes correlations between the values of
arguments to the same function, and so forth. In this
paper, we describe a unified methodology for
implementing and understanding polyvariance in a
higher-order setting (i.e., for control-flow analyses).
We do this by extending the method of abstracting
abstract machines (AAM), a systematic approach to
producing an abstract interpretation of
abstract-machine semantics. AAM eliminates recursion
within a language's semantics by passing around an
explicit store, and thus places importance on the
strategy an analysis uses for allocating abstract
addresses within the abstract heap or store. We build
on AAM by showing that the design space of possible
abstract allocators exactly and uniquely corresponds to
the design space of polyvariant strategies. This allows
us to both unify and generalize polyvariance as tunings
of a single function. Changes to the behavior of this
function easily recapitulate classic styles of analysis
and produce novel variations, combinations of
techniques, and fundamentally new techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Ueno:2016:FCG,
author = "Katsuhiro Ueno and Atsushi Ohori",
title = "A fully concurrent garbage collector for functional
programs on multicore processors",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "421--433",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951944",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a concurrent garbage collection
method for functional programs running on a multicore
processor. It is a concurrent extension of our
bitmap-marking non-moving collector with Yuasa's
snapshot-at-the-beginning strategy. Our collector is
unobtrusive in the sense of the Doligez-Leroy-Gonthier
collector; the collector does not stop any mutator
thread nor does it force them to synchronize globally.
The only critical sections between a mutator and the
collector are the code to enqueue/dequeue a 32 kB
allocation segment to/from a global segment list and
the write barrier code to push an object pointer onto
the collector's stack. Most of these data structures
can be implemented in standard lock-free data
structures. This achieves both efficient allocation and
unobtrusive collection in a multicore system. The
proposed method has been implemented in SML\#, a
full-scale Standard ML compiler supporting multiple
native threads on multicore CPUs. Our benchmark tests
show a drastically short pause time with reasonably low
overhead compared to the sequential bitmap-marking
collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Lindley:2016:TBS,
author = "Sam Lindley and J. Garrett Morris",
title = "Talking bananas: structural recursion for session
types",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "434--447",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951921",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Session types provide static guarantees that
concurrent programs respect communication protocols. We
give a novel account of recursive session types in the
context of GV, a small concurrent extension of the
linear \lambda -calculus. We extend GV with recursive
types and catamorphisms, following the initial algebra
semantics of recursion, and show that doing so
naturally gives rise to recursive session types. We
show that this principled approach to recursion
resolves long-standing problems in the treatment of
duality for recursive session types. We characterize
the expressiveness of GV concurrency by giving a CPS
translation to (non-concurrent) \lambda -calculus and
proving that reduction in GV is simulated by full
reduction in \lambda -calculus. This shows that GV
remains terminating in the presence of positive
recursive types, and that such arguments extend to
other extensions of GV, such as polymorphism or
non-linear types, by appeal to normalization results
for sequential \lambda -calculi. We also show that GV
remains deadlock free and deterministic in the presence
of recursive types. Finally, we extend CP, a
session-typed process calculus based on linear logic,
with recursive types, and show that doing so preserves
the connection between reduction in GV and cut
elimination in CP.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Morris:2016:BBW,
author = "J. Garrett Morris",
title = "The best of both worlds: linear functional programming
without compromise",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "448--461",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951925",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a linear functional calculus with both the
safety guarantees expressible with linear types and the
rich language of combinators and composition provided
by functional programming. Unlike previous combinations
of linear typing and functional programming, we
compromise neither the linear side (for example, our
linear values are first-class citizens of the language)
nor the functional side (for example, we do not require
duplicate definitions of compositions for linear and
unrestricted functions). To do so, we must generalize
abstraction and application to encompass both linear
and unrestricted functions. We capture the typing of
the generalized constructs with a novel use of
qualified types. Our system maintains the metatheoretic
properties of the theory of qualified types, including
principal types and decidable type inference. Finally,
we give a formal basis for our claims of
expressiveness, by showing that evaluation respects
linearity, and that our language is a conservative
extension of existing functional calculi.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Thiemann:2016:CFS,
author = "Peter Thiemann and Vasco T. Vasconcelos",
title = "Context-free session types",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "462--475",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951926",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Session types describe structured communication on
heterogeneously typed channels at a high level. Their
tail-recursive structure imposes a protocol that can be
described by a regular language. The types of
transmitted values are drawn from the underlying
functional language, abstracting from the details of
serializing values of structured data types.
Context-free session types extend session types by
allowing nested protocols that are not restricted to
tail recursion. Nested protocols correspond to
deterministic context-free languages. Such protocols
are interesting in their own right, but they are
particularly suited to describe the low-level
serialization of tree-structured data in a type-safe
way. We establish the metatheory of context-free
session types, prove that they properly generalize
standard (two-party) session types, and take first
steps towards type checking by showing that type
equivalence is decidable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Gaboardi:2016:CEC,
author = "Marco Gaboardi and Shin-ya Katsumata and Dominic
Orchard and Flavien Breuvart and Tarmo Uustalu",
title = "Combining effects and coeffects via grading",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "476--489",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951939",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Effects and coeffects are two general, complementary
aspects of program behaviour. They roughly correspond
to computations which change the execution context
(effects) versus computations which make demands on the
context (coeffects). Effectful features include
partiality, non-determinism, input-output, state, and
exceptions. Coeffectful features include resource
demands, variable access, notions of linearity, and
data input requirements. The effectful or coeffectful
behaviour of a program can be captured and described
via type-based analyses, with fine grained information
provided by monoidal effect annotations and semiring
coeffects. Various recent work has proposed models for
such typed calculi in terms of graded (strong) monads
for effects and graded (monoidal) comonads for
coeffects. Effects and coeffects have been studied
separately so far, but in practice many computations
are both effectful and coeffectful, e.g., possibly
throwing exceptions but with resource requirements. To
remedy this, we introduce a new general calculus with a
combined effect-coeffect system. This can describe both
the changes and requirements that a program has on its
context, as well as interactions between these
effectful and coeffectful features of computation. The
effect-coeffect system has a denotational model in
terms of effect-graded monads and coeffect-graded
comonads where interaction is expressed via the novel
concept of graded distributive laws. This graded
semantics unifies the syntactic type theory with the
denotational model. We show that our calculus can be
instantiated to describe in a natural way various
different kinds of interaction between a program and
its evaluation context.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Pirog:2016:SDF,
author = "Maciej Pir{\'o}g and Nicolas Wu",
title = "String diagrams for free monads (functional pearl)",
journal = j-SIGPLAN,
volume = "51",
number = "9",
pages = "490--501",
month = sep,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022670.2951947",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show how one can reason about free monads using
their universal properties rather than any concrete
implementation. We introduce a graphical,
two-dimensional calculus tailor-made to accommodate
these properties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ICFP '16 conference proceedings.",
}
@Article{Wade:2017:AVJ,
author = "April W. Wade and Prasad A. Kulkarni and Michael R.
Jantz",
title = "{AOT} vs. {JIT}: impact of profile data on code
quality",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "1--10",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081037",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Just-in-time (JIT) compilation during program
execution and ahead-of-time (AOT) compilation during
software installation are alternate techniques used by
managed language virtual machines (VM) to generate
optimized native code while simultaneously achieving
binary code portability and high execution performance.
Profile data collected by JIT compilers at run-time can
enable profile-guided optimizations (PGO) to customize
the generated native code to different program inputs.
AOT compilation removes the speed and energy overhead
of online profile collection and dynamic compilation,
but may not be able to achieve the quality and
performance of customized native code. The goal of this
work is to investigate and quantify the implications of
the AOT compilation model on the quality of the
generated native code for current VMs. First, we
quantify the quality of native code generated by the
two compilation models for a state-of-the-art (HotSpot)
Java VM. Second, we determine how the amount of profile
data collected affects the quality of generated code.
Third, we develop a mechanism to determine the accuracy
or similarity for different profile data for a given
program run, and investigate how the accuracy of
profile data affects its ability to effectively guide
PGOs. Finally, we categorize the profile data types in
our VM and explore the contribution of each such
category to performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Taylor:2017:AOO,
author = "Ben Taylor and Vicent Sanz Marco and Zheng Wang",
title = "Adaptive optimization for {OpenCL} programs on
embedded heterogeneous systems",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "11--20",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081040",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous multi-core architectures consisting of
CPUs and GPUs are commonplace in today's embedded
systems. These architectures offer potential for energy
efficient computing if the application task is mapped
to the right core. Realizing such potential is
challenging due to the complex and evolving nature of
hardware and applications. This paper presents an
automatic approach to map OpenCL kernels onto
heterogeneous multi-cores for a given optimization
criterion --- whether it is faster runtime, lower
energy consumption or a trade-off between them. This is
achieved by developing a machine learning based
approach to predict which processor to use to run the
OpenCL kernel and the host program, and at what
frequency the processor should operate. Instead of
hand-tuning a model for each optimization metric, we
use machine learning to develop a unified framework
that first automatically learns the optimization
heuristic for each metric off-line, then uses the
learned knowledge to schedule OpenCL kernels at runtime
based on code and runtime information of the program.
We apply our approach to a set of representative OpenCL
benchmarks and evaluate it on an ARM big.LITTLE mobile
platform. Our approach achieves over 93\% of the
performance delivered by a perfect predictor.We obtain,
on average, 1.2x, 1.6x, and 1.8x improvement
respectively for runtime, energy consumption and the
energy delay product when compared to a comparative
heterogeneous-aware OpenCL task mapping scheme.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Reiche:2017:AVI,
author = "Oliver Reiche and Christof Kobylko and Frank Hannig
and J{\"u}rgen Teich",
title = "Auto-vectorization for image processing {DSLs}",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "21--30",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081039",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The parallelization of programs and distributing their
workloads to multiple threads can be a challenging
task. In addition to multi-threading, harnessing vector
units in CPUs proves highly desirable. However,
employing vector units to speed up programs can be
quite tedious. Either a program developer solely relies
on the auto-vectorization capabilities of the compiler
or he manually applies vector intrinsics, which is
extremely error-prone, difficult to maintain, and not
portable at all. Based on whole-function vectorization,
a method to replace control flow with data flow, we
propose auto-vectorization techniques for image
processing DSLs in the context of source-to-source
compilation. The approach does not require the input to
be available in SSA form. Moreover, we formulate
constraints under which the vectorization analysis and
code transformations may be greatly simplified in the
context of image processing DSLs. As part of our
methodology, we present control flow to data flow
transformation as a source-to-source translation.
Moreover, we propose a method to efficiently analyze
algorithms with mixed bit-width data types to determine
the optimal SIMD width, independently of the target
instruction set. The techniques are integrated into an
open source DSL framework. Subsequently, the
vectorization capabilities are compared to a variety of
existing state-of-the-art C/C++ compilers. A geometric
mean speedup of up to 3.14 is observed for benchmarks
taken from ISPC and image processing, compared to
non-vectorized executions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Fu:2017:DTS,
author = "Sheng-Yu Fu and Ding-Yong Hong and Yu-Ping Liu and
Jan-Jan Wu and Wei-Chung Hsu",
title = "Dynamic translation of structured Loads\slash Stores
and register mapping for architectures with {SIMD}
extensions",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "31--40",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081029",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "More and more modern processors have been supporting
non-contiguous SIMD data accesses. However, translating
such instructions has been overlooked in the Dynamic
Binary Translation (DBT) area. For example, in the
popular QEMU dynamic binary translator, guest memory
instructions with strides are emulated by a sequence of
scalar instructions, leaving a significant room for
performance improvement when the host machines have
SIMD instructions available. Structured loads/stores,
such as VLDn/VSTn in ARM NEON, are one type of strided
SIMD data access instructions. They are widely used in
signal processing, multimedia, mathematical and 2D
matrix transposition applications. Efficient
translation of such structured loads/stores is a
critical issue when migrating ARM executables to other
ISAs. However, it is quite challenging since not only
the translation of structured loads/stores is not
trivial, but also the difference between guest and host
register configurations must be taken into
consideration. In this work, we present the design and
implementation of translating structured loads/stores
in DBT, including target code generation as well as
efficient SIMD register mapping. Our proposed register
mapping mechanisms are not limited to handling
structured loads/stores, they can be extended to deal
with normal SIMD instructions. On a set of OpenCV
benchmarks, our QEMU-based system has achieved a
maximum speedup of 5.41x, with an average improvement
of 2.93x. On a set of BLAS benchmarks, our system has
also obtained a maximum speedup of 2.19x and an average
improvement of 1.63x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Jiang:2017:OFU,
author = "Weiwen Jiang and Edwin H.-M. Sha and Qingfeng Zhuge
and Hailiang Dong and Xianzhang Chen",
title = "Optimal functional unit assignment and voltage
selection for pipelined {MPSoC} with guaranteed
probability on time performance",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "41--50",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081036",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Pipelined heterogeneous multiprocessor system-on-chip
(MPSoC) can provide high throughput for streaming
applications. In the design of such systems, time
performance and system cost are the most concerning
issues. By analyzing runtime behaviors of benchmarks in
real-world platforms, we find that execution times of
tasks are not fixed but spread with probabilities. In
terms of this feature, we model execution times of
tasks as random variables. In this paper, we study how
to design high-performance and low-cost MPSoC systems
to execute a set of such tasks with data dependencies
in a pipelined fashion. Our objective is to obtain the
optimal functional unit assignment and voltage
selection for the pipelined MPSoC systems, such that
the system cost is minimized while timing constraints
can be met with a given guaranteed probability. For
each required probability, our proposed algorithm can
efficiently obtain the optimal solution. Experiments
show that other existing algorithms cannot find
feasible solutions in most cases, but ours can. Even
for those solutions that other algorithms can obtain,
ours can reach 30\% reductions in total cost compared
with others.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Lee:2017:IIP,
author = "Gyeongmin Lee and Seonyeong Heo and Bongjun Kim and
Jong Kim and Hanjun Kim",
title = "Integrated {IoT} programming with selective
abstraction",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "51--60",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081031",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The explosion of networked devices has driven a new
computing environment called the Internet of Things
(IoT), enabling various services such as home
automation and health monitoring. Despite the promising
applicability of the IoT, developing an IoT service is
challenging for programmers, because the programmers
should integrate multiple programmable devices and
heterogeneous third-party devices. Recent works have
proposed integrated programming platforms, but they
either require device-specific implementation for
third-party devices without any device abstraction, or
abstract all the devices to the standard interfaces
requiring unnecessary abstraction of programmable
devices. To integrate IoT devices with selective
abstraction, this work revisits the object oriented
programming (OOP) model, and proposes a new language
extension and its compiler-runtime framework, called
Esperanto. With three annotations that map each object
to its corresponding IoT device, the Esperanto language
allows programmers to integrate multiple programmable
devices into one OOP program and to abstract similar
third-party devices into their common ancestor classes.
Given the annotations, the Esperanto compiler
automatically partitions the integrated program into
multiple sub-programs for each programmable IoT device,
and inserts communication and synchronization code.
Moreover, for the ancestor classes, the Esperanto
runtime dynamically identifies connected third-party
devices, and links their corresponding descendent
objects. Compared to an existing approach on the
integrated IoT programming, Esperanto requires 33.3\%
fewer lines of code to implement 5 IoT services, and
reduces their response time by 44.8\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Zhang:2017:TSB,
author = "Min Zhang and Yunhui Ying",
title = "Towards {SMT-based} {LTL} model checking of clock
constraint specification language for real-time and
embedded systems",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "61--70",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081035",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Clock Constraint Specification Language (CCSL) is
a formal language companion to MARTE (shorthand for
Modeling and Analysis of Real-Time and Embedded
systems), a UML profile used to facilitate the design
and analysis of real-time and embedded systems. CCSL is
proposed to specify constraints on the occurrences of
events in systems. However, the language lacks
efficient verification support to formally analyze
temporal properties, which are important properties to
real-time and embedded systems. In this paper, we
propose an SMT-based approach to model checking of the
temporal properties specified in Linear Temporal Logic
(LTL) for CCSL by transforming CCSL constraints and LTL
formulas into SMT formulas. We implement a prototype
tool for the proposed approach and use the
state-of-the-art tool Z3 as its underlying SMT solver.
We model two practical real-time and embedded systems,
i.e., a traffic light controller and a power window
system in CCSL , and model check LTL properties of them
using the proposed approach. Experimental results
demonstrate the effectiveness and efficiency of our
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Zheng:2017:ITS,
author = "Wenguang Zheng and Hui Wu and Chuanyao Nie",
title = "Integrating task scheduling and cache locking for
multicore real-time embedded systems",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "71--80",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081033",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern embedded processors provide hardware support
for cache locking, a mechanism used to facilitate the
WCET (Worst-Case Execution Time) calculation of a task.
We investigate the problem of integrating task
scheduling and cache locking for a set of preemptible
tasks with individual release times and deadlines on a
multi-core processor with two-level caches. We propose
a novel integrated approach that schedules the task set
and allocates the locked cache contents of each task to
the local caches (L1 caches) and the level-two cache
(L2 cache). Our approach consists of three major
components, the task scheduler, the L1 cache allocator,
and the L2 cache allocator. The task scheduler aims at
minimizing the number of task preemptions. The L1 cache
allocator converts the interference graph of all the
tasks scheduled on each core into a DAG by considering
the preemptions between tasks and allocates the L1
cache space to each task. The L2 cache allocator
converts the interference graph of all the tasks into a
DAG by using a k-longest-path-based graph orientation
algorithm and allocates the L2 cache space to each
task. Both cache allocators significantly improve the
cache utilization for all the caches due to the
efficient use of the interference graphs of tasks. We
have implemented our approach and compared it with the
extended version of the preemption tree-based approach
and the static analysis approach without cache locking
by using a set of benchmarks from the MRTC WCET
benchmark suite and SNU real-time benchmarks. Compared
to the extended version of the preemption tree-based
approach, the maximum WCRT (Worst Case Response Time)
improvement of our approach is 15\%. Compared to the
static analysis approach, the maximum WCRT improvement
of our approach is 37\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Wang:2017:TME,
author = "Yi Wang and Mingxu Zhang and Jing Yang",
title = "Towards memory-efficient processing-in-memory
architecture for convolutional neural networks",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "81--90",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081032",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Convolutional neural networks (CNNs) are widely
adopted in artificial intelligent systems. In contrast
to conventional computing centric applications, the
computational and memory resources of CNN applications
are mixed together in the network weights. This incurs
a significant amount of data movement, especially for
highdimensional convolutions. Although recent embedded
3D-stacked Processing-in-Memory (PIM) architecture
alleviates this memory bottleneck to provide fast
near-data processing, memory is still a limiting factor
of the entire system. An unsolved key challenge is how
to efficiently allocate convolutions to 3D-stacked PIM
to combine the advantages of both neural and
computational processing. This paper presents
Memolution, a compiler-based memory efficient data
allocation strategy for convolutional neural networks
on PIM architecture. Memolution offers thread-level
parallelism that can fully exploit the computational
power of PIM architecture. The objective is to capture
the characteristics of neural network applications and
present a hardware-independent design to transparently
allocate CNN applications onto the underlining hardware
resources provided by PIM. We demonstrate the viability
of the proposed technique using a variety of realistic
convolutional neural network applications. Our
extensive evaluations show that, Memolution
significantly improves performance and the cache
utilization compared to the baseline scheme.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Ding:2017:UNS,
author = "Xianzhong Ding and Zhiyong Zhang and Zhiping Jia and
Lei Ju and Mengying Zhao and Huawei Huang",
title = "Unified {nvTCAM} and {sTCAM} architecture for
improving packet matching performance",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "91--100",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081034",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software-Defined Networking (SDN) allows controlling
applications to install fine-grained forwarding
policies in the underlying switches. Ternary Content
Addressable Memory (TCAM) enables fast lookups in
hardware switches with flexible wildcard rule patterns.
However, the performance of packet processing is
severely constrained by the capacity of TCAM, which
aggravates the processing burden and latency issues. In
this paper, we propose a hybrid TCAM architecture which
consists of NVM-based TCAM (nvTCAM) and SRAM-based TCAM
(sTCAM), utilizing nvTCAM to cache the most popular
rules to improve cache-hit-ratio while relying on a
very small-size sTCAM to handle cache-miss traffic to
effectively decrease update latency. Considering the
special rule dependency, we present an efficient Rule
Migration Replacement (RMR) policy to make full
utilization of both nvTCAM and sTCAM to obtain better
performance. Experimental results show that the
proposed architecture outperforms current TCAM
architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Pan:2017:LPM,
author = "Chen Pan and Mimi Xie and Yongpan Liu and Yanzhi Wang
and Chun Jason Xue and Yuangang Wang and Yiran Chen and
Jingtong Hu",
title = "A lightweight progress maximization scheduler for
non-volatile processor under unstable energy
harvesting",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "101--110",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081038",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy harvesting techniques become increasingly
popular as power supplies for embedded systems.
However, the harvested energy is intrinsically
unstable. Thus, the program execution may be
interrupted frequently. Although the development of
non-volatile processors (NVP) can save and restore
execution states, both hardware and software challenges
exist for energy harvesting powered embedded systems.
On the hardware side, existing power detector only
signals the ``poor'' quality of the harvested power
based on a preset threshold voltage. The inappropriate
setting of this threshold will make the NVP based
embedded system suffer from either unnecessary
checkpointing or checkpointing failures. On the
software side, not all tasks can be checkpointed. Once
the power is off, these tasks will have to restart from
the beginning. In this paper, a task scheduler is
proposed to maximize task progress by prioritizing
tasks which cannot be checkpointed when power is weak
so that they can finish before the power outage. To
assist task scheduling, three additional modules
including voltage monitor, checkpointing handler, and
routine handler, are proposed. Experimental results
show increased overall task progress and reduced energy
consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Dietrich:2017:OVA,
author = "Christian Dietrich and Daniel Lohmann",
title = "{OSEK-V}: application-specific {RTOS} instantiation in
hardware",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "111--120",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081030",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The employment of a real-time operating system (RTOS)
in an embedded control systems is often an
all-or-nothing decision: While the RTOS-abstractions
provide for easier software composition and
development, the price in terms of event latencies and
memory costs are high. Especially in HW/SW codesign
settings, system developers try to avoid the employment
of a full-blown RTOS as far as possible. In OSEK-V, we
mitigate this trade-off by a very aggressive tailoring
of the concrete RTOS instance into the hardware.
Instead of implementing generic OS components as custom
hardware devices, we capture the actually possible
application-kernel interactions as a finite-state
machine and integrate the tailored RTOS semantics
directly into the processor pipeline. In our
experimental results with an OSEK-based implementation
of a quadrotor flight controller into the Rocket/RISC-V
softcore, we thereby can significantly reduce event
latencies, interrupt lock times, and memory footprint
at moderate costs in terms of FPGA resources.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '17 conference proceedings.",
}
@Article{Pai:2016:CTO,
author = "Sreepathi Pai and Keshav Pingali",
title = "A compiler for throughput optimization of graph
algorithms on {GPUs}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "1--19",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984015",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing high-performance GPU implementations of graph
algorithms can be challenging. In this paper, we argue
that three optimizations called throughput
optimizations are key to high-performance for this
application class. These optimizations describe a large
implementation space making it unrealistic for
programmers to implement them by hand. To address this
problem, we have implemented these optimizations in a
compiler that produces CUDA code from an
intermediate-level program representation called IrGL.
Compared to state-of-the-art handwritten CUDA
implementations of eight graph applications, code
generated by the IrGL compiler is up to 5.95x times
faster (median 1.4x) for five applications and never
more than 30\% slower for the others. Throughput
optimizations contribute an improvement up to 4.16x
(median 1.4x) to the performance of unoptimized IrGL
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Surendran:2016:APP,
author = "Rishi Surendran and Vivek Sarkar",
title = "Automatic parallelization of pure method calls via
conditional future synthesis",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "20--38",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984035",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a novel approach for using futures to
automatically parallelize the execution of pure method
calls. Our approach is built on three new techniques to
address the challenge of automatic parallelization via
future synthesis: candidate future synthesis,
parallelism benefit analysis, and threshold expression
synthesis. During candidate future synthesis, our
system annotates pure method calls as async expressions
and synthesizes a parallel program with future objects
and their type declarations. Next, the system performs
a parallel benefit analysis to determine which async
expressions may need to be executed sequentially due to
overhead reasons, based on execution profile
information collected from multiple test inputs.
Finally, threshold expression synthesis uses the output
from parallelism benefit analysis to synthesize
predicate expressions that can be used to determine at
runtime if a specific pure method call should be
executed sequentially or in parallel. We have
implemented our approach, and the results obtained from
an experimental evaluation of the complete system on a
range of sequential Java benchmarks are very
encouraging. Our evaluation shows that our approach can
provide significant parallel speedups of up to 7.4 $
\times $ (geometric mean of 3.69 $ \times $) relative
to the sequential programs when using 8 processor
cores, with zero programmer effort beyond providing the
sequential program and test cases for parallelism
benefit analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Sorensen:2016:PIW,
author = "Tyler Sorensen and Alastair F. Donaldson and Mark
Batty and Ganesh Gopalakrishnan and Zvonimir
Rakamari{\'c}",
title = "Portable inter-workgroup barrier synchronisation for
{GPUs}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "39--58",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984032",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the growing popularity of GPGPU programming,
there is not yet a portable and formally-specified
barrier that one can use to synchronise across
workgroups. Moreover, the occupancy-bound execution
model of GPUs breaks assumptions inherent in
traditional software execution barriers, exposing them
to deadlock. We present an occupancy discovery protocol
that dynamically discovers a safe estimate of the
occupancy for a given GPU and kernel, allowing for a
starvation-free (and hence, deadlock-free)
inter-workgroup barrier by restricting the number of
workgroups according to this estimate. We implement
this idea by adapting an existing, previously
non-portable, GPU inter-workgroup barrier to use OpenCL
2.0 atomic operations, and prove that the barrier meets
its natural specification in terms of synchronisation.
We assess the portability of our approach over eight
GPUs spanning four vendors, comparing the performance
of our method against alternative methods. Our key
findings include: (1){\^A} the recall of our discovery
protocol is nearly 100\%; (2){\^A} runtime comparisons
vary substantially across GPUs and applications; and
(3){\^A} our method provides portable and safe
inter-workgroup synchronisation across the applications
we study.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Doeraene:2016:PIW,
author = "S{\'e}bastien Doeraene and Tobias Schlatter",
title = "Parallel incremental whole-program optimizations for
{Scala.js}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "59--73",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984013",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Whole-program optimizations are powerful tools that
can dramatically improve performance, size and other
aspects of programs. Because they depend on global
knowledge, they must typically be reapplied to the
whole program when small changes are made, which makes
them too slow for the development cycle. This is an
issue for some environments that require, or benefit a
lot from, whole-program optimizations, such as
compilation to JavaScript or to the Dalvik VM, because
their development cycle is slowed down either by the
lack of optimizations, or by the time spent on applying
them. We present a new approach to designing
incremental whole-program optimizers for
object-oriented and functional languages: when part of
a program changes, only the portions affected by the
changes are reoptimized. An incremental optimizer using
this approach for Scala.js, the Scala to JavaScript
compiler, demonstrates speedups from 10x to 100x
compared to its batch version. As a result, the
optimizer's running time becomes insignificant compared
to separate compilation, making it fit for use on every
compilation run during the development cycle. We also
show how to parallelize the incremental algorithm to
take advantage of multicore hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Stefanescu:2016:SBP,
author = "Andrei Stefanescu and Daejun Park and Shijiao Yuwen
and Yilong Li and Grigore Rosu",
title = "Semantics-based program verifiers for all languages",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "74--91",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984027",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a language-independent verification
framework that can be instantiated with an operational
semantics to automatically generate a program verifier.
The framework treats both the operational semantics and
the program correctness specifications as reachability
rules between matching logic patterns, and uses the
sound and relatively complete reachability logic proof
system to prove the specifications using the semantics.
We instantiate the framework with the semantics of one
academic language, KernelC, as well as with three
recent semantics of real-world languages, C, Java, and
JavaScript, developed independently of our verification
infrastructure. We evaluate our approach empirically
and show that the generated program verifiers can check
automatically the full functional correctness of
challenging heap-manipulating programs implementing
operations on list and tree data structures, like AVL
trees. This is the first approach that can turn the
operational semantics of real-world languages into
correct-by-construction automatic verifiers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Sergey:2016:HSS,
author = "Ilya Sergey and Aleksandar Nanevski and Anindya
Banerjee and Germ{\'a}n Andr{\'e}s Delbianco",
title = "{Hoare}-style specifications as correctness conditions
for non-linearizable concurrent objects",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "92--110",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983999",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Designing efficient concurrent objects often requires
abandoning the standard specification technique of
linearizability in favor of more relaxed correctness
conditions. However, the variety of alternatives makes
it difficult to choose which condition to employ, and
how to compose them when using objects specified by
different conditions. In this work, we propose a
uniform alternative in the form of Hoare logic, which
can explicitly capture--in the auxiliary state--the
interference of environment threads. We demonstrate the
expressiveness of our method by verifying a number of
concurrent objects and their clients, which have so far
been specified only by non-standard conditions of
concurrency-aware linearizability, quiescent, and
quantitative quiescent consistency. We report on the
implementation of the ideas in an existing Coq-based
tool, providing the first mechanized proofs for all the
examples in the paper.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Nienhuis:2016:OSC,
author = "Kyndylan Nienhuis and Kayvan Memarian and Peter
Sewell",
title = "An operational semantics for {C\slash C++11}
concurrency",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "111--128",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983997",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The C/C++11 concurrency model balances two goals: it
is relaxed enough to be efficiently implementable and
(leaving aside the ``thin-air'' problem) it is strong
enough to give useful guarantees to programmers. It is
mathematically precise and has been used in
verification research and compiler testing. However,
the model is expressed in an axiomatic style, as
predicates on complete candidate executions. This
suffices for computing the set of allowed executions of
a small litmus test, but it does not directly support
the incremental construction of executions of larger
programs. It is also at odds with conventional
operational semantics, as used implicitly in the rest
of the C/C++ standards. Our main contribution is the
development of an operational model for C/C++11
concurrency. This covers all the features of the
previous formalised axiomatic model, and we have a
mechanised proof that the two are equivalent, in
Isabelle/HOL. We also integrate this semantics with an
operational semantics for sequential C (described
elsewhere); the combined semantics can incrementally
execute programs in a small fragment of C. Doing this
uncovered several new aspects of the C/C++11 model: we
show that one cannot build an equivalent operational
model that simply follows program order, sequential
consistent order, or the synchronises-with order. The
first negative result is forced by hardware-observable
behaviour, but the latter two are not, and so might be
ameliorated by changing C/C++11. More generally, we
hope that this work, with its focus on incremental
construction of executions, will inform the future
design of new concurrency models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Dan:2016:MAR,
author = "Andrei Marian Dan and Patrick Lam and Torsten Hoefler
and Martin Vechev",
title = "Modeling and analysis of remote memory access
programming",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "129--144",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984033",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent advances in networking hardware have led to a
new generation of Remote Memory Access (RMA) networks
in which processors from different machines can
communicate directly, bypassing the operating system
and allowing higher performance. Researchers and
practitioners have proposed libraries and programming
models for RMA to enable the development of
applications running on these networks, However, the
memory models implied by these RMA libraries and
languages are often loosely specified, poorly
understood, and differ depending on the underlying
network architecture and other factors. Hence, it is
difficult to precisely reason about the semantics of
RMA programs or how changes in the network architecture
affect them. We address this problem with the following
contributions: (i) a coreRMA language which serves as a
common foundation, formalizing the essential
characteristics of RMA programming; (ii) complete
axiomatic semantics for that language; (iii)
integration of our semantics with an existing
constraint solver, enabling us to exhaustively generate
coreRMA programs (litmus tests) up to a specified bound
and check whether the tests satisfy their
specification; and (iv) extensive validation of our
semantics on real-world RMA systems. We generated and
ran 7441 litmus tests using each of the low-level RMA
network APIs: DMAPP, VPI Verbs, and Portals 4. Our
results confirmed that our model successfully captures
behaviors exhibited by these networks. Moreover, we
found RMA programs that behave inconsistently with
existing documentation, confirmed by network experts.
Our work provides an important step towards
understanding existing RMA networks, thus influencing
the design of future RMA interfaces and hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Itzhaky:2016:DDC,
author = "Shachar Itzhaky and Rohit Singh and Armando
Solar-Lezama and Kuat Yessenov and Yongquan Lu and
Charles Leiserson and Rezaul Chowdhury",
title = "Deriving divide-and-conquer dynamic programming
algorithms using solver-aided transformations",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "145--164",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983993",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a framework allowing domain experts to
manipulate computational terms in the interest of
deriving better, more efficient implementations.It
employs deductive reasoning to generate provably
correct efficient implementations from a very
high-level specification of an algorithm, and inductive
constraint-based synthesis to improve automation.
Semantic information is encoded into program terms
through the use of refinement types. In this paper, we
develop the technique in the context of a system called
Bellmania that uses solver-aided tactics to derive
parallel divide-and-conquer implementations of dynamic
programming algorithms that have better locality and
are significantly more efficient than traditional
loop-based implementations. Bellmania includes a
high-level language for specifying dynamic programming
algorithms and a calculus that facilitates gradual
transformation of these specifications into efficient
implementations. These transformations formalize the
divide-and conquer technique; a visualization interface
helps users to interactively guide the process, while
an SMT-based back-end verifies each step and takes care
of low-level reasoning required for parallelism. We
have used the system to generate provably correct
implementations of several algorithms, including some
important algorithms from computational biology, and
show that the performance is comparable to that of the
best manually optimized code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Srinivasan:2016:SMC,
author = "Venkatesh Srinivasan and Tushar Sharma and Thomas
Reps",
title = "Speeding up machine-code synthesis",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "165--180",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984006",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Machine-code synthesis is the problem of searching for
an instruction sequence that implements a semantic
specification, given as a formula in quantifier-free
bit-vector logic (QFBV). Instruction sets like Intel's
IA-32 have around 43,000 unique instruction schemas;
this huge instruction pool, along with the exponential
cost inherent in enumerative synthesis, results in an
enormous search space for a machine-code synthesizer:
even for relatively small specifications, the
synthesizer might take several hours or days to find an
implementation. In this paper, we present several
improvements to the algorithms used in a
state-of-the-art machine-code synthesizer McSynth. In
addition to a novel pruning heuristic, our improvements
incorporate a number of ideas known from the
literature, which we adapt in novel ways for the
purpose of speeding up machine-code synthesis. Our
experiments for Intel's IA-32 instruction set show that
our improvements enable synthesis of code for 12 out of
14 formulas on which McSynth times out, speeding up the
synthesis time by at least 1981X, and for the remaining
formulas, speeds up synthesis by 3X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Panchekha:2016:ARW,
author = "Pavel Panchekha and Emina Torlak",
title = "Automated reasoning for web page layout",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "181--194",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984010",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Web pages define their appearance using Cascading
Style Sheets, a modular language for layout of
tree-structured documents. In principle, using CSS is
easy: the developer specifies declarative constraints
on the layout of an HTML document (such as the
positioning of nodes in the HTML tree), and the browser
solves the constraints to produce a box-based rendering
of that document. In practice, however, the subtleties
of CSS semantics make it difficult to develop
stylesheets that produce the intended layout across
different user preferences and browser settings. This
paper presents the first mechanized formalization of a
substantial fragment of the CSS semantics. This
formalization is equipped with an efficient reduction
to the theory of quantifier-free linear real
arithmetic, enabling effective automated reasoning
about CSS stylesheets and their behavior. We implement
this reduction in Cassius, a solver-aided framework for
building semantics-aware tools for CSS. To demonstrate
the utility of Cassius, we prototype new tools for
automated verification, debugging, and synthesis of CSS
code. We show that these tools work on fragments of
real-world websites, and that Cassius is a practical
first step toward solver-aided programming for the
web.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Wang:2016:FFS,
author = "Xinyu Wang and Sumit Gulwani and Rishabh Singh",
title = "{FIDEX}: filtering spreadsheet data using examples",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "195--213",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984030",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data filtering in spreadsheets is a common problem
faced by millions of end-users. The task of data
filtering requires a computational model that can
separate intended positive and negative string
instances. We present a system, FIDEX, that can
efficiently learn desired data filtering expressions
from a small set of positive and negative string
examples. There are two key ideas of our approach.
First, we design an expressive DSL to represent
disjunctive filter expressions needed for several
real-world data filtering tasks. Second, we develop an
efficient synthesis algorithm for incrementally
learning consistent filter expressions in the DSL from
very few positive and negative examples. A DAG-based
data structure is used to succinctly represent a large
number of filter expressions, and two corresponding
operators are defined for algorithmically handling
positive and negative examples, namely, the
intersection and subtraction operators. FIDEX is able
to learn data filters for 452 out of 460 real-world
data filtering tasks in real time (0.22s), using only
2.2 positive string instances and 2.7 negative string
instances on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Moore:2016:EAC,
author = "Scott Moore and Christos Dimoulas and Robert Bruce
Findler and Matthew Flatt and Stephen Chong",
title = "Extensible access control with authorization
contracts",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "214--233",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984021",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing programming language access control
frameworks do not meet the needs of all software
components. We propose an expressive framework for
implementing access control monitors for components.
The basis of the framework is a novel concept: the
authority environment. An authority environment
associates rights with an execution context. The
building blocks of access control monitors in our
framework are authorization contracts: software
contracts that manage authority environments. We
demonstrate the expressiveness of our framework by
implementing a diverse set of existing access control
mechanisms and writing custom access control monitors
for three realistic case studies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Osvald:2016:GGT,
author = "Leo Osvald and Gr{\'e}gory Essertel and Xilun Wu and
Lilliam I. Gonz{\'a}lez Alay{\'o}n and Tiark Rompf",
title = "Gentrification gone too far? {Affordable} 2nd-class
values for fun and (co-)effect",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "234--251",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984009",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "First-class functions dramatically increase
expressiveness, at the expense of static guarantees. In
ALGOL or PASCAL, functions could be passed as arguments
but never escape their defining scope. Therefore,
function arguments could serve as temporary access
tokens or capabilities, enabling callees to perform
some action, but only for the duration of the call. In
modern languages, such programming patterns are no
longer available. The central thrust of this paper is
to re-introduce second-class functions and other values
alongside first-class entities in modern languages. We
formalize second-class values with stack-bounded
lifetimes as an extension to simply-typed $ \lambda $
calculus, and for richer type systems such as F$_{ <
\colon }$ and systems with path-dependent types. We
generalize the binary first- vs second-class
distinction to arbitrary privilege lattices, with the
underlying type lattice as a special case. In this
setting, abstract types naturally enable privilege
parametricity. We prove type soundness and lifetime
properties in Coq. We implement our system as an
extension of Scala, and present several case studies.
First, we modify the Scala Collections library and add
privilege annotations to all higher-order functions.
Privilege parametricity is key to retain the high
degree of code-reuse between sequential and parallel as
well as lazy and eager collections. Second, we use
scoped capabilities to introduce a model of checked
exceptions in the Scala library, with only few changes
to the code. Third, we employ second-class capabilities
for memory safety in a region-based off-heap memory
library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{DiLorenzo:2016:IFD,
author = "Jonathan DiLorenzo and Richard Zhang and Erin Menzies
and Kathleen Fisher and Nate Foster",
title = "Incremental forest: a {DSL} for efficiently managing
filestores",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "252--271",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984034",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "File systems are often used to store persistent
application data, but manipulating file systems using
standard APIs can be difficult for programmers. Forest
is a domain-specific language that bridges the gap
between the on-disk and in-memory representations of
file system data. Given a high-level specification of
the structure, contents, and properties of a collection
of directories, files, and symbolic links, the Forest
compiler generates tools for loading, storing, and
validating that data. Unfortunately, the initial
implementation of Forest offered few mechanisms for
controlling cost --- e.g., the run-time system could
load gigabytes of data, even if only a few bytes were
needed. This paper introduces Incremental Forest
(iForest), an extension to Forest with an explicit
delay construct that programmers can use to precisely
control costs. We describe the design of iForest using
a series of running examples, present a formal
semantics in a core calculus, and define a simple cost
model that accurately characterizes the resources
needed to use a given specification. We propose skins,
which allow programmers to modify the delay structure
of a specification in a compositional way, and develop
a static type system for ensuring compatibility between
specifications and skins. We prove the soundness and
completeness of the type system and a variety of
algebraic properties of skins. We describe an OCaml
implementation and evaluate its performance on
applications developed in collaboration with watershed
hydrologists.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Haller:2016:LLA,
author = "Philipp Haller and Alex Loiko",
title = "{LaCasa}: lightweight affinity and object capabilities
in {Scala}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "272--291",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984042",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aliasing is a known source of challenges in the
context of imperative object-oriented languages, which
have led to important advances in type systems for
aliasing control. However, their large-scale adoption
has turned out to be a surprisingly difficult
challenge. While new language designs show promise,
they do not address the need of aliasing control in
existing languages. This paper presents a new approach
to isolation and uniqueness in an existing, widely-used
language, Scala. The approach is unique in the way it
addresses some of the most important obstacles to the
adoption of type system extensions for aliasing
control. First, adaptation of existing code requires
only a minimal set of annotations. Only a single bit of
information is required per class. Surprisingly, the
paper shows that this information can be provided by
the object-capability discipline, widely-used in
program security. We formalize our approach as a type
system and prove key soundness theorems. The type
system is implemented for the full Scala language,
providing, for the first time, a sound integration with
Scala's local type inference. Finally, we empirically
evaluate the conformity of existing Scala open-source
code on a corpus of over 75,000 LOC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{PerezDeRosso:2016:PCM,
author = "Santiago {Perez De Rosso} and Daniel Jackson",
title = "Purposes, concepts, misfits, and a redesign of git",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "292--310",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984018",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Git is a widely used version control system that is
powerful but complicated. Its complexity may not be an
inevitable consequence of its power but rather evidence
of flaws in its design. To explore this hypothesis, we
analyzed the design of Git using a theory that
identifies concepts, purposes, and misfits. Some
well-known difficulties with Git are described, and
explained as misfits in which underlying concepts fail
to meet their intended purpose. Based on this analysis,
we designed a reworking of Git (called Gitless) that
attempts to remedy these flaws. To correlate misfits
with issues reported by users, we conducted a study of
Stack Overflow questions. And to determine whether
users experienced fewer complications using Gitless in
place of Git, we conducted a small user study. Results
suggest our approach can be profitable in identifying,
analyzing, and fixing design problems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Kim:2016:AAP,
author = "Dohyeong Kim and Yonghwi Kwon and Peng Liu and I. Luk
Kim and David Mitchel Perry and Xiangyu Zhang and
Gustavo Rodriguez-Rivera",
title = "{Apex}: automatic programming assignment error
explanation",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "311--327",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984031",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents Apex, a system that can
automatically generate explanations for programming
assignment bugs, regarding where the bugs are and how
the root causes led to the runtime failures. It works
by comparing the passing execution of a correct
implementation (provided by the instructor) and the
failing execution of the buggy implementation
(submitted by the student). The technique overcomes a
number of technical challenges caused by syntactic and
semantic differences of the two implementations. It
collects the symbolic traces of the executions and
matches assignment statements in the two execution
traces by reasoning about symbolic equivalence. It then
matches predicates by aligning the control dependences
of the matched assignment statements, avoiding direct
matching of path conditions which are usually quite
different. Our evaluation shows that Apex is every
effective for 205 buggy real world student submissions
of 4 programming assignments, and a set of 15
programming assignment type of buggy programs collected
from stackoverflow.com, precisely pinpointing the root
causes and capturing the causality for 94.5\% of them.
The evaluation on a standard benchmark set with over
700 student bugs shows similar results. A user study in
the classroom shows that Apex has substantially
improved student productivity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Hanappi:2016:ARC,
author = "Oliver Hanappi and Waldemar Hummer and Schahram
Dustdar",
title = "Asserting reliable convergence for configuration
management scripts",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "328--343",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984000",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The rise of elastically scaling applications that
frequently deploy new machines has led to the adoption
of DevOps practices across the cloud engineering stack.
So-called configuration management tools utilize
scripts that are based on declarative resource
descriptions and make the system converge to the
desired state. It is crucial for convergent
configurations to be able to gracefully handle
transient faults, e.g., network outages when
downloading and installing software packages. In this
paper we introduce a conceptual framework for asserting
reliable convergence in configuration management. Based
on a formal definition of configuration scripts and
their resources, we utilize state transition graphs to
test whether a script makes the system converge to the
desired state under different conditions. In our
generalized model, configuration actions are partially
ordered, often resulting in prohibitively many possible
execution orders. To reduce this problem space, we
define and analyze a property called preservation, and
we show that if preservation holds for all pairs of
resources, then convergence holds for the entire
configuration. Our implementation builds on Puppet, but
the approach is equally applicable to other frameworks
like Chef, Ansible, etc. We perform a comprehensive
evaluation based on real world Puppet scripts and show
the effectiveness of the approach. Our tool is able to
detect all idempotence and convergence related issues
in a set of existing Puppet scripts with known issues
as well as some hitherto undiscovered bugs in a large
random sample of scripts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Treichler:2016:DP,
author = "Sean Treichler and Michael Bauer and Rahul Sharma and
Elliott Slaughter and Alex Aiken",
title = "Dependent partitioning",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "344--358",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984016",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A key problem in parallel programming is how data is
partitioned: divided into subsets that can be operated
on in parallel and, in distributed memory machines,
spread across multiple address spaces. We present a
dependent partitioning framework that allows an
application to concisely describe relationships between
partitions. Applications first establish independent
partitions, which may contain arbitrary subsets of
application data, permitting the expression of
arbitrary application-specific data distributions.
Dependent partitions are then derived from these using
the dependent partitioning operations provided by the
framework. By directly capturing inter-partition
relationships, our framework can soundly and precisely
reason about programs to perform important program
analyses crucial to ensuring correctness and achieving
good performance. As an example of the reasoning made
possible, we present a static analysis that discharges
most consistency checks on partitioned data during
compilation. We describe an implementation of our
framework within Regent, a language designed for the
Legion programming model. The use of dependent
partitioning constructs results in a 86-96\% decrease
in the lines of code required to describe the
partitioning, eliminates many of the expensive dynamic
checks required for soundness by the current Regent
partitioning implementation, and speeds up the
computation of partitions by 2.6-12.7X even on a single
thread. Additionally, we show that a distributed
implementation incorporated into the Legion runtime
system allows partitioning of data sets that are too
large to fit on a single node and yields a further 29X
speedup of partitioning operations on 64 nodes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Kulkarni:2016:APA,
author = "Sulekha Kulkarni and Ravi Mangal and Xin Zhang and
Mayur Naik",
title = "Accelerating program analyses by cross-program
training",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "359--377",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984023",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Practical programs share large modules of code.
However, many program analyses are ineffective at
reusing analysis results for shared code across
programs. We present POLYMER, an analysis optimizer to
address this problem. POLYMER runs the analysis offline
on a corpus of training programs and learns analysis
facts over shared code. It prunes the learnt facts to
eliminate intermediate computations and then reuses
these pruned facts to accelerate the analysis of other
programs that share code with the training corpus. We
have implemented POLYMER to accelerate analyses
specified in Datalog, and apply it to optimize two
analyses for Java programs: a call-graph analysis that
is flow- and context-insensitive, and a points-to
analysis that is flow- and context-sensitive. We
evaluate the resulting analyses on ten programs from
the DaCapo suite that share the JDK library. POLYMER
achieves average speedups of 2.6$ \times $ for the
call-graph analysis and 5.2$ \times $ for the points-to
analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Srinivasan:2016:IAS,
author = "Venkatesh Srinivasan and Thomas Reps",
title = "An improved algorithm for slicing machine code",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "378--393",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984003",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Machine-code slicing is an important primitive for
building binary analysis and rewriting tools, such as
taint trackers, fault localizers, and partial
evaluators. However, it is not easy to create a
machine-code slicer that exhibits a high level of
precision. Moreover, the problem of creating such a
tool is compounded by the fact that a small amount of
local imprecision can be amplified via cascade effects.
Most instructions in instruction sets such as Intel's
IA-32 and ARM are multi-assignments: they have several
inputs and several outputs (registers, flags, and
memory locations). This aspect of the instruction set
introduces a granularity issue during slicing: there
are often instructions at which we would like the slice
to include only a subset of the instruction's
semantics, whereas the slice is forced to include the
entire instruction. Consequently, the slice computed by
state-of-the-art tools is very imprecise, often
including essentially the entire program. This paper
presents an algorithm to slice machine code more
accurately. To counter the granularity issue, our
algorithm performs slicing at the microcode level,
instead of the instruction level, and obtains a more
precise microcode slice. To reconstitute a machine-code
program from a microcode slice, our algorithm uses
machine-code synthesis. Our experiments on IA-32
binaries of FreeBSD utilities show that, in comparison
to slices computed by a state-of-the-art tool, our
algorithm reduces the size of backward slices by 33\%,
and forward slices by 70\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Petrashko:2016:CGL,
author = "Dmitry Petrashko and Vlad Ureche and Ondrej Lhot{\'a}k
and Martin Odersky",
title = "Call graphs for languages with parametric
polymorphism",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "394--409",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983991",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The performance of contemporary object oriented
languages depends on optimizations such as
devirtualization, inlining, and specialization, and
these in turn depend on precise call graph analysis.
Existing call graph analyses do not take advantage of
the information provided by the rich type systems of
contemporary languages, in particular generic type
arguments. Many existing approaches analyze Java
bytecode, in which generic types have been erased. This
paper shows that this discarded information is actually
very useful as the context in a context-sensitive
analysis, where it significantly improves precision and
keeps the running time small. Specifically, we propose
and evaluate call graph construction algorithms in
which the contexts of a method are (i) the type
arguments passed to its type parameters, and (ii) the
static types of the arguments passed to its term
parameters. The use of static types from the caller as
context is effective because it allows more precise
dispatch of call sites inside the callee. Our
evaluation indicates that the average number of
contexts required per method is small. We implement the
analysis in the Dotty compiler for Scala, and evaluate
it on programs that use the type-parametric Scala
collections library and on the Dotty compiler itself.
The context-sensitive analysis runs 1.4x faster than a
context-insensitive one and discovers 20\% more
monomorphic call sites at the same time. When applied
to method specialization, the imprecision in a
context-insensitive call graph would require the
average method to be cloned 22 times, whereas the
context-sensitive call graph indicates a much more
practical 1.00 to 1.50 clones per method. We applied
the proposed analysis to automatically specialize
generic methods. The resulting automatic transformation
achieves the same performance as state-of-the-art
techniques requiring manual annotations, while reducing
the size of the generated bytecode by up to 5 $ \times
$.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Chandra:2016:TIS,
author = "Satish Chandra and Colin S. Gordon and Jean-Baptiste
Jeannin and Cole Schlesinger and Manu Sridharan and
Frank Tip and Youngil Choi",
title = "Type inference for static compilation of
{JavaScript}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "410--429",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984017",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a type system and inference algorithm for a
rich subset of JavaScript equipped with objects,
structural subtyping, prototype inheritance, and
first-class methods. The type system supports abstract
and recursive objects, and is expressive enough to
accommodate several standard benchmarks with only minor
workarounds. The invariants enforced by the types
enable an ahead-of-time compiler to carry out
optimizations typically beyond the reach of static
compilers for dynamic languages. Unlike previous
inference techniques for prototype inheritance, our
algorithm uses a combination of lower and upper bound
propagation to infer types and discover type errors in
all code, including uninvoked functions. The inference
is expressed in a simple constraint language, designed
to leverage off-the-shelf fixed point solvers. We prove
soundness for both the type system and inference
algorithm. An experimental evaluation showed that the
inference is powerful, handling the aforementioned
benchmarks with no manual type annotation, and that the
inferred types enable effective static compilation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Samak:2016:DSF,
author = "Malavika Samak and Omer Tripp and Murali Krishna
Ramanathan",
title = "Directed synthesis of failing concurrent executions",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "430--446",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984040",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Detecting concurrency-induced bugs in multithreaded
libraries can be challenging due to the intricacies
associated with their manifestation. This includes
invocation of multiple methods, synthesis of inputs to
the methods to reach the failing location, and crafting
of thread interleavings that cause the erroneous
behavior. Neither fuzzing-based testing techniques nor
over-approximate static analyses are well positioned to
detect such subtle defects while retaining high
accuracy alongside satisfactory coverage. In this
paper, we propose a directed, iterative and scalable
testing engine that combines the strengths of static
and dynamic analysis to help synthesize concurrent
executions to expose complex concurrency-induced bugs.
Our engine accepts as input the library, its client
(either sequential or concurrent) and a specification
of correctness. Then, it iteratively refines the client
to generate an execution that can break the input
specification. Each step of the iterative process
includes statically identifying sub-goals towards the
goal of failing the specification, generating a plan
toward meeting these goals, and merging of the paths
traversed dynamically with the plan computed statically
via constraint solving to generate a new client. The
engine reports full reproduction scenarios, guaranteed
to be true, for the bugs it finds. We have created a
prototype of our approach named MINION. We validated
MINION by applying it to well-tested concurrent classes
from popular Java libraries, including the latest
versions of OpenJDK and Google-Guava. We were able to
detect 31 real crashes across 10 classes in a total of
23 minutes, including previously unknown bugs.
Comparison with three other tools reveals that
combined, they report only 9 of the 31 crashes (and no
other crashes beyond MINION). This is because several
of these bugs manifest under deeply nested path
conditions (observed maximum of 11), deep nesting of
method invocations (observed maximum of 6) and multiple
refinement iterations to generate the crash-inducing
client.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Huang:2016:MCR,
author = "Shiyou Huang and Jeff Huang",
title = "Maximal causality reduction for {TSO} and {PSO}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "447--461",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984025",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Verifying concurrent programs is challenging due to
the exponentially large thread interleaving space. The
problem is exacerbated by relaxed memory models such as
Total Store Order (TSO) and Partial Store Order (PSO)
which further explode the interleaving space by
reordering instructions. A recent advance, Maximal
Causality Reduction (MCR), has shown great promise to
improve verification effectiveness by maximally
reducing redundant explorations. However, the original
MCR only works for the Sequential Consistency (SC)
memory model, but not for TSO and PSO. In this paper,
we develop novel extensions to MCR by solving two key
problems under TSO and PSO: (1) generating
interleavings that can reach new states by encoding the
operational semantics of TSO and PSO with first-order
logical constraints and solving them with SMT solvers,
and (2) enforcing TSO and PSO interleavings by
developing novel replay algorithms that allow
executions out of the program order. We show that our
approach successfully enables MCR to effectively
explore TSO and PSO interleavings. We have compared our
approach with a recent Dynamic Partial Order Reduction
(DPOR) algorithm for TSO and PSO and a SAT-based
stateless model checking approach. Our results show
that our approach is much more effective than the other
approaches for both state-space exploration and bug
finding --- on average it explores 5-10X fewer
executions and finds many bugs that the other tools
cannot find.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Huang:2016:PMR,
author = "Jeff Huang and Arun K. Rajagopalan",
title = "Precise and maximal race detection from incomplete
traces",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "462--476",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984024",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present RDIT, a novel dynamic technique to detect
data races in multithreaded programs with incomplete
trace information, i.e., in the presence of missing
events. RDIT is both precise and maximal: it does not
report any false alarms and it detects a maximal set of
true traces from the observed incomplete trace. RDIT is
underpinned by a sound BarrierPair model that abstracts
away the missing events by capturing the invocation
data of their enclosing methods. By making the least
conservative abstraction that a missing method
introduces synchronization only when it has a memory
address in scope that overlaps with other events or
other missing methods, and by formulating maximal
thread causality as logical constraints, RDIT
guarantees to precisely detect races with maximal
capability. RDIT has been applied in seven real-world
large concurrent systems and has detected dozens of
true races with zero false alarms. Comparatively,
existing algorithms such as Happens-Before,
Causal-Precedes, and Maximal-Causality which are known
to be precise all report many false alarms when missing
synchronizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Blum:2016:SMC,
author = "Ben Blum and Garth Gibson",
title = "Stateless model checking with data-race preemption
points",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "477--493",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984036",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Stateless model checking is a powerful technique for
testing concurrent programs, but suffers from
exponential state space explosion when the test input
parameters are too large. Several reduction techniques
can mitigate this explosion, but even after pruning
equivalent interleavings, the state space size is often
intractable. Most prior tools are limited to preempting
only on synchronization APIs, which reduces the space
further, but can miss unsynchronized thread
communication bugs. Data race detection, another
concurrency testing approach, focuses on suspicious
memory access pairs during a single test execution. It
avoids concerns of state space size, but may report
races that do not lead to observable failures, which
jeopardizes a user's willingness to use the analysis.
We present Quicksand, a new stateless model checking
framework which manages the exploration of many state
spaces using different preemption points. It uses state
space estimation to prioritize jobs most likely to
complete in a fixed CPU budget, and it incorporates
data-race analysis to add new preemption points on the
fly. Preempting threads during a data race's
instructions can automatically classify the race as
buggy or benign, and uncovers new bugs not reachable by
prior model checkers. It also enables full verification
of all possible schedules when every data race is
verified as benign within the CPU budget. In our
evaluation, Quicksand found 1.25x as many bugs and
verified 4.3x as many tests compared to prior model
checking approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Gollamudi:2016:AEE,
author = "Anitha Gollamudi and Stephen Chong",
title = "Automatic enforcement of expressive security policies
using enclaves",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "494--513",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984002",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hardware-based enclave protection mechanisms, such as
Intel's SGX, ARM's TrustZone, and Apple's Secure
Enclave, can protect code and data from powerful
low-level attackers. In this work, we use enclaves to
enforce strong application-specific information
security policies. We present IMP$_E$, a novel calculus
that captures the essence of SGX-like enclave
mechanisms, and show that a security-type system for
IMP$_E$ can enforce expressive confidentiality policies
(including erasure policies and delimited release
policies) against powerful low-level attackers,
including attackers that can arbitrarily corrupt
non-enclave code, and, under some circumstances,
corrupt enclave code. We present a translation from an
expressive security-typed calculus (that is not aware
of enclaves) to IMP$_E$. The translation automatically
places code and data into enclaves to enforce the
security policies of the source program.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Colin:2016:CTC,
author = "Alexei Colin and Brandon Lucia",
title = "{Chain}: tasks and channels for reliable intermittent
programs",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "514--530",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983995",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Energy harvesting computers enable general-purpose
computing using energy collected from their
environment. Energy-autonomy of such devices has great
potential, but their intermittent power supply poses a
challenge. Intermittent program execution compromises
progress and leaves state inconsistent. This work
describes Chain: a new model for programming
intermittent devices. A Chain program is a set of
programmer-defined tasks that compute and exchange data
through channels. Chain guarantees forward progress at
task granularity. A task is restartable and never sees
inconsistent state, because its input and output
channels are separated. Our system supports language
features for expressing advanced data exchange patterns
and for encapsulating reusable functionality. Chain
fundamentally differs from state-of-the-art
checkpointing approaches and does not incur the
associated overhead. We implement Chain as C language
extensions and a runtime library. We used Chain to
implement four applications: machine learning,
encryption, compression, and sensing. In experiments,
Chain ensured consistency where prior approaches failed
and improved throughput by 2-7x over the leading
state-of-the-art system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Bonetta:2016:GSM,
author = "Daniele Bonetta and Luca Salucci and Stefan Marr and
Walter Binder",
title = "{GEMs}: shared-memory parallel programming for
{Node.js}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "531--547",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984039",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript is the most popular programming language
for client-side Web applications, and Node.js has
popularized the language for server-side computing,
too. In this domain, the minimal support for parallel
programming remains however a major limitation. In this
paper we introduce a novel parallel programming
abstraction called Generic Messages (GEMs). GEMs allow
one to combine message passing and shared-memory
parallelism, extending the classes of parallel
applications that can be built with Node.js. GEMs have
customizable semantics and enable several forms of
thread safety, isolation, and concurrency control. GEMs
are designed as convenient JavaScript abstractions that
expose high-level and safe parallelism models to the
developer. Experiments show that GEMs outperform
equivalent Node.js applications thanks to their usage
of shared memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Peters:2016:OCF,
author = "Arthur Michener Peters and David Kitchin and John A.
Thywissen and William R. Cook",
title = "{OrcO}: a concurrency-first approach to objects",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "548--567",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984022",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The majority of modern programming languages provide
concurrency and object-orientation in some form.
However, object-oriented concurrency remains cumbersome
in many situations. We introduce the language OrcO, Orc
with concurrent Objects, which enables a flexible style
of concurrent object-oriented programming. OrcO extends
the Orc programming language by adding abstractions for
programming-in-the-large; namely objects, classes, and
inheritance. OrcO objects are designed to be orthogonal
to concurrency, allowing the concurrent structure and
object structure of a program to evolve independently.
This paper describes OrcO's goals and design and
provides examples of how OrcO can be used to deftly
handle events, object management, and object
composition.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Ancona:2016:SSI,
author = "Davide Ancona and Andrea Corradi",
title = "Semantic subtyping for imperative object-oriented
languages",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "568--587",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983992",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Semantic subtyping is an approach for defining sound
and complete procedures to decide subtyping for
expressive types, including union and intersection
types; although it has been exploited especially in
functional languages for XML based programming,
recently it has been partially investigated in the
context of object-oriented languages, and a sound and
complete subtyping algorithm has been proposed for
record types, but restricted to immutable fields, with
union and recursive types interpreted coinductively to
support cyclic objects. In this work we address the
problem of studying semantic subtyping for imperative
object-oriented languages, where fields can be mutable;
in particular, we add read/write field annotations to
record types, and, besides union, we consider
intersection types as well, while maintaining
coinductive interpretation of recursive types. In this
way, we get a richer notion of type with a flexible
subtyping relation, able to express a variety of type
invariants useful for enforcing static guarantees for
mutable objects. The addition of these features
radically changes the definition of subtyping, and,
hence, the corresponding decision procedure, and
surprisingly invalidates some subtyping laws that hold
in the functional setting. We propose an intuitive
model where mutable record values contain type
information to specify the values that can be correctly
stored in fields. Such a model, and the corresponding
subtyping rules, require particular care to avoid
circularity between coinductive judgments and their
negations which, by duality, have to be interpreted
inductively. A sound and complete subtyping algorithm
is provided, together with a prototype
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Brachthauser:2016:PFC,
author = "Jonathan Immanuel Brachth{\"a}user and Tillmann Rendel
and Klaus Ostermann",
title = "Parsing with first-class derivatives",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "588--606",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984026",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Brzozowski derivatives, well known in the context of
regular expressions, have recently been rediscovered to
give a simplified explanation to parsers of
context-free languages. We add derivatives as a novel
first-class feature to a standard parser combinator
language. First-class derivatives enable an inversion
of the control flow, allowing to implement modular
parsers for languages that previously required separate
pre-processing steps or cross-cutting modifications of
the parsers. We show that our framework offers new
opportunities for reuse and supports a modular
definition of interesting use cases of layout-sensitive
parsing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Kell:2016:MLE,
author = "Stephen Kell and Dominic P. Mulligan and Peter
Sewell",
title = "The missing link: explaining {ELF} static linking,
semantically",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "607--623",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983996",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Beneath the surface, software usually depends on
complex linker behaviour to work as intended. Even
linking {\tt hello\_world.c} is surprisingly involved,
and systems software such as {\tt libc} and operating
system kernels rely on a host of linker features. But
linking is poorly understood by working programmers and
has largely been neglected by language researchers. In
this paper we survey the many use-cases that linkers
support and the poorly specified linker speak by which
they are controlled: metadata in object files,
command-line options, and linker-script language. We
provide the first validated formalisation of a
realistic executable and linkable format (ELF), and
capture aspects of the Application Binary Interfaces
for four mainstream platforms (AArch64, AMD64, Power64,
and IA32). Using these, we develop an executable
specification of static linking, covering (among other
things) enough to link small C programs (we use the
example of bzip2) into a correctly running executable.
We provide our specification in Lem and Isabelle/HOL
forms. This is the first formal specification of
mainstream linking. We have used the Isabelle/HOL
version to prove a sample correctness property for one
case of AMD64 ABI relocation, demonstrating that the
specification supports formal proof, and as a first
step towards the much more ambitious goal of verified
linking. Our work should enable several novel strands
of research, including linker-aware verified
compilation and program analysis, and better languages
for controlling linking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Rompf:2016:TSD,
author = "Tiark Rompf and Nada Amin",
title = "Type soundness for dependent object types {(DOT)}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "624--641",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984008",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scala's type system unifies aspects of ML modules,
object-oriented, and functional programming. The
Dependent Object Types (DOT) family of calculi has been
proposed as a new theoretic foundation for Scala and
similar expressive languages. Unfortunately, type
soundness has only been established for restricted
subsets of DOT. In fact, it has been shown that
important Scala features such as type refinement or a
subtyping relation with lattice structure break at
least one key metatheoretic property such as
environment narrowing or invertible subtyping
transitivity, which are usually required for a type
soundness proof. The main contribution of this paper is
to demonstrate how, perhaps surprisingly, even though
these properties are lost in their full generality, a
rich DOT calculus that includes recursive type
refinement and a subtyping lattice with intersection
types can still be proved sound. The key insight is
that subtyping transitivity only needs to be invertible
in code paths executed at runtime, with contexts
consisting entirely of valid runtime objects, whereas
inconsistent subtyping contexts can be permitted for
code that is never executed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Daloze:2016:ETS,
author = "Benoit Daloze and Stefan Marr and Daniele Bonetta and
Hanspeter M{\"o}ssenb{\"o}ck",
title = "Efficient and thread-safe objects for
dynamically-typed languages",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "642--659",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984001",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We are in the multi-core era. Dynamically-typed
languages are in widespread use, but their support for
multithreading still lags behind. One of the reasons is
that the sophisticated techniques they use to
efficiently represent their dynamic object models are
often unsafe in multithreaded environments. This paper
defines safety requirements for dynamic object models
in multithreaded environments. Based on these
requirements, a language-agnostic and thread-safe
object model is designed that maintains the efficiency
of sequential approaches. This is achieved by ensuring
that field reads do not require synchronization and
field updates only need to synchronize on objects
shared between threads. Basing our work on
JRuby+Truffle, we show that our safe object model has
zero overhead on peak performance for thread-local
objects and only 3\% average overhead on parallel
benchmarks where field updates require synchronization.
Thus, it can be a foundation for safe and efficient
multithreaded VMs for a wide range of dynamic
languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Chapman:2016:HSH,
author = "Keith Chapman and Antony L. Hosking and J. Eliot B.
Moss",
title = "Hybrid {STM\slash HTM} for nested transactions on
{OpenJDK}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "660--676",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984029",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional memory (TM) has long been advocated as a
promising pathway to more automated concurrency control
for scaling concurrent programs running on parallel
hardware. Software TM (STM) has the benefit of being
able to run general transactional programs, but at the
significant cost of overheads imposed to log memory
accesses, mediate access conflicts, and maintain other
transaction metadata. Recently, hardware manufacturers
have begun to offer commodity hardware TM (HTM) support
in their processors wherein the transaction metadata is
maintained ``for free'' in hardware. However, HTM
approaches are only best-effort: they cannot
successfully run all transactional programs, whether
because of hardware capacity issues (causing large
transactions to fail), or compatibility restrictions on
the processor instructions permitted within hardware
transactions (causing transactions that execute those
instructions to fail). In such cases, programs must
include failure-handling code to attempt the
computation by some other software means, since
retrying the transaction would be futile. Thus, a
canonical use of HTM is lock elision: replacing lock
regions with transactions, retrying some number of
times in the case of conflicts, but falling back to
locking when HTM fails for other reasons. Here, we
describe how software and hardware schemes can combine
seamlessly into a hybrid system in support of
transactional programs, allowing use of low-cost HTM
when it works, but reverting to STM when it doesn't. We
describe heuristics used to make this choice
dynamically and automatically, but allowing the
transition back to HTM opportunistically. Our
implementation is for an extension of Java having
syntax for both open and closed nested transactions,
and boosting, running on the OpenJDK, with dynamic
injection of STM mechanisms (into code variants used
under STM) and HTM instructions (into code variants
used under HTM). Both schemes are compatible to allow
different threads to run concurrently with either
mechanism, while preserving transaction safety. Using a
standard synthetic benchmark we demonstrate that HTM
offers significant acceleration of both closed and open
nested transactions, while yielding parallel scaling up
to the limits of the hardware, whereupon scaling in
software continues but with the penalty to throughput
imposed by software mechanisms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Bhandari:2016:MFR,
author = "Kumud Bhandari and Dhruva R. Chakrabarti and Hans-J.
Boehm",
title = "{Makalu}: fast recoverable allocation of non-volatile
memory",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "677--694",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984019",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Byte addressable non-volatile memory (NVRAM) is likely
to supplement, and perhaps eventually replace, DRAM.
Applications can then persist data structures directly
in memory instead of serializing them and storing them
onto a durable block device. However, failures during
execution can leave data structures in NVRAM
unreachable or corrupt. In this paper, we present
Makalu, a system that addresses non-volatile memory
management. Makalu offers an integrated allocator and
recovery-time garbage collector that maintains internal
consistency, avoids NVRAM memory leaks, and is
efficient, all in the face of failures. We show that a
careful allocator design can support a less restrictive
and a much more familiar programming model than
existing persistent memory allocators. Our allocator
significantly reduces the per allocation persistence
overhead by lazily persisting non-essential metadata
and by employing a post-failure recovery-time garbage
collector. Experimental results show that the resulting
online speed and scalability of our allocator are
comparable to well-known transient allocators, and
significantly better than state-of-the-art persistent
allocators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Nunez:2016:PGC,
author = "Diogenes Nunez and Samuel Z. Guyer and Emery D.
Berger",
title = "Prioritized garbage collection: explicit {GC} support
for software caches",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "695--710",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984028",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmers routinely trade space for time to increase
performance, often in the form of caching or
memoization. In managed languages like Java or
JavaScript, however, this space-time tradeoff is
complex. Using more space translates into higher
garbage collection costs, especially at the limit of
available memory. Existing runtime systems provide
limited support for space-sensitive algorithms, forcing
programmers into difficult and often brittle choices
about provisioning. This paper presents prioritized
garbage collection, a cooperative programming language
and runtime solution to this problem. Prioritized GC
provides an interface similar to soft references,
called priority references, which identify objects that
the collector can reclaim eagerly if necessary. The key
difference is an API for defining the policy that
governs when priority references are cleared and in
what order. Application code specifies a priority value
for each reference and a target memory bound. The
collector reclaims references, lowest priority first,
until the total memory footprint of the cache fits
within the bound. We use this API to implement a
space-aware least-recently-used (LRU) cache, called a
Sache, that is a drop-in replacement for existing
caches, such as Google's Guava library. The garbage
collector automatically grows and shrinks the Sache in
response to available memory and workload with minimal
provisioning information from the programmer. Using a
Sache, it is almost impossible for an application to
experience a memory leak, memory pressure, or an
out-of-memory crash caused by software caching.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Steimann:2016:CRA,
author = "Friedrich Steimann and J{\"o}rg Hagemann and Bastian
Ulke",
title = "Computing repair alternatives for malformed programs
using constraint attribute grammars",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "711--730",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984007",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Attribute grammars decorate the nodes of a program's
parse tree with attributes whose values are defined by
equations encoding the (static) semantics of a
programming language. We show how replacing the
equations of an attribute grammar with equivalent
constraints that can be solved by a constraint solver
allows us to compute repairs of a malformed program
solely from a specification that was originally
designed for checking its well-formedness. We present
two repair modes --- shallow and deep fixing --- whose
computed repair alternatives are guaranteed to repair
every error on which they are invoked. While shallow
fixing may introduce new errors, deep fixing never
does; to make it tractable, we implement it using
neighborhood search. We demonstrate the feasibility of
our approach by implementing it on top of ExtendJ, an
attribute grammar based Java compiler, and by applying
it to an example from the Java EE context, detecting
and fixing well-formedness errors (both real and
injected) in a body of 14 open-source subject
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Raychev:2016:PMC,
author = "Veselin Raychev and Pavol Bielik and Martin Vechev",
title = "Probabilistic model for code with decision trees",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "731--747",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984041",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we introduce a new approach for learning
precise and general probabilistic models of code based
on decision tree learning. Our approach directly
benefits an emerging class of statistical programming
tools which leverage probabilistic models of code
learned over large codebases (e.g., GitHub) to make
predictions about new programs (e.g., code completion,
repair, etc). The key idea is to phrase the problem of
learning a probabilistic model of code as learning a
decision tree in a domain specific language over
abstract syntax trees (called TGen). This allows us to
condition the prediction of a program element on a
dynamically computed context. Further, our problem
formulation enables us to easily instantiate known
decision tree learning algorithms such as ID3, but also
to obtain new variants we refer to as ID3+ and E13, not
previously explored and ones that outperform ID3 in
prediction accuracy. Our approach is general and can be
used to learn a probabilistic model of any programming
language. We implemented our approach in a system
called Deep3 and evaluated it for the challenging task
of learning probabilistic models of JavaScript and
Python. Our experimental results indicate that Deep3
predicts elements of JavaScript and Python code with
precision above 82\% and 69\%, respectively. Further,
Deep3 often significantly outperforms state-of-the-art
approaches in overall prediction accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Barman:2016:RWA,
author = "Shaon Barman and Sarah Chasins and Rastislav Bodik and
Sumit Gulwani",
title = "{Ringer}: web automation by demonstration",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "748--764",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984020",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With increasing amounts of data available on the web
and a diverse range of users interested in
programmatically accessing that data, web automation
must become easier. Automation helps users complete
many tedious interactions, such as scraping data,
completing forms, or transferring data between
websites. However, writing web automation scripts
typically requires an expert programmer because the
writer must be able to reverse engineer the target
webpage. We have built a record and replay tool,
Ringer, that makes web automation accessible to
non-coders. Ringer takes a user demonstration as input
and creates a script that interacts with the page as a
user would. This approach makes Ringer scripts more
robust to webpage changes because user-facing
interfaces remain relatively stable compared to the
underlying webpage implementations. We evaluated our
approach on benchmarks recorded on real webpages and
found that it replayed 4x more benchmarks than a
state-of-the-art replay tool.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Weitz:2016:SVB,
author = "Konstantin Weitz and Doug Woos and Emina Torlak and
Michael D. Ernst and Arvind Krishnamurthy and Zachary
Tatlock",
title = "Scalable verification of {Border Gateway Protocol}
configurations with an {SMT} solver",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "765--780",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984012",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Internet Service Providers (ISPs) use the Border
Gateway Protocol (BGP) to announce and exchange routes
for delivering packets through the internet. ISPs must
carefully configure their BGP routers to ensure traffic
is routed reliably and securely. Correctly configuring
BGP routers has proven challenging in practice, and
misconfiguration has led to worldwide outages and
traffic hijacks. This paper presents Bagpipe, a system
that enables ISPs to declaratively express BGP policies
and that automatically verifies that router
configurations implement such policies. The novel
initial network reduction soundly reduces policy
verification to a search for counterexamples in a
finite space. An SMT-based symbolic execution engine
performs this search efficiently. Bagpipe reduces the
size of its search space using predicate abstraction
and parallelizes its search using symbolic variable
hoisting. Bagpipe's policy specification language is
expressive: we expressed policies inferred from real AS
configurations, policies from the literature, and
policies for 10 Juniper TechLibrary configuration
scenarios. Bagpipe is efficient: we ran it on three
ASes with a total of over 240,000 lines of Cisco and
Juniper BGP configuration. Bagpipe is effective: it
revealed 19 policy violations without issuing any false
positives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Loncaric:2016:PFT,
author = "Calvin Loncaric and Satish Chandra and Cole
Schlesinger and Manu Sridharan",
title = "A practical framework for type inference error
explanation",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "781--799",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983994",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many languages have support for automatic type
inference. But when inference fails, the reported error
messages can be unhelpful, highlighting a code location
far from the source of the problem. Several lines of
work have emerged proposing error reports derived from
correcting sets: a set of program points that, when
fixed, produce a well-typed program. Unfortunately,
these approaches are tightly tied to specific
languages; targeting a new language requires encoding a
type inference algorithm for the language in a custom
constraint system specific to the error reporting tool.
We show how to produce correcting set-based error
reports by leveraging existing type inference
implementations, easing the burden of adoption and, as
type inference algorithms tend to be efficient in
practice, producing error reports of comparable quality
to similar error reporting tools orders of magnitude
faster. Many type inference algorithms are already
formulated as dual phases of type constraint generation
and solving; rather than (re)implementing type
inference in an error explanation tool, we isolate the
solving phase and treat it as an oracle for solving
typing constraints. Given any set of typing
constraints, error explanation proceeds by iteratively
removing conflicting constraints from the initial
constraint set until discovering a subset on which the
solver succeeds; the constraints removed form a
correcting set. Our approach is agnostic to the
semantics of any particular language or type system,
instead leveraging the existing type inference engine
to give meaning to constraints.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Kell:2016:DDT,
author = "Stephen Kell",
title = "Dynamically diagnosing type errors in unsafe code",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "800--819",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2983998",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing approaches for detecting type errors in
unsafe languages are limited. Static analysis methods
are imprecise, and often require source-level changes,
while most dynamic methods check only memory properties
(bounds, liveness, etc.), owing to a lack of run-time
type information. This paper describes libcrunch, a
system for binary-compatible run-time type checking of
unmodified unsafe code, currently focusing on C.
Practical experience shows that our prototype
implementation is easily applicable to many real
codebases without source-level modification, correctly
flags programmer errors with a very low rate of false
positives, offers a very low run-time overhead, and
covers classes of error caught by no previously
existing tool.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Long:2016:FCE,
author = "Yuheng Long and Yu David Liu and Hridesh Rajan",
title = "First-class effect reflection for effect-guided
programming",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "820--837",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984037",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces a novel type-and-effect
calculus, first-class effects, where the computational
effect of an expression can be programmatically
reflected, passed around as values, and analyzed at run
time. A broad range of designs ``hard-coded'' in
existing effect-guided analyses --- from thread
scheduling, version-consistent software updating, to
data zeroing --- can be naturally supported through the
programming abstractions. The core technical
development is a type system with a number of features,
including a hybrid type system that integrates static
and dynamic effect analyses, a refinement type system
to verify application-specific effect management
properties, a double-bounded type system that computes
both over-approximation of effects and their
under-approximation. We introduce and establish a
notion of soundness called trace consistency, defined
in terms of how the effect and trace correspond. The
property sheds foundational insight on ``good''
first-class effect programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Amin:2016:JST,
author = "Nada Amin and Ross Tate",
title = "{Java} and {Scala}'s type systems are unsound: the
existential crisis of null pointers",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "838--848",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984004",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present short programs that demonstrate the
unsoundness of Java and Scala's current type systems.
In particular, these programs provide parametrically
polymorphic functions that can turn any type into any
type without (down)casting. Fortunately, parametric
polymorphism was not integrated into the Java Virtual
Machine (JVM), so these examples do not demonstrate any
unsoundness of the JVM. Nonetheless, we discuss broader
implications of these findings on the field of
programming languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Sun:2016:FCB,
author = "Chengnian Sun and Vu Le and Zhendong Su",
title = "Finding compiler bugs via live code mutation",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "849--863",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984038",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Validating optimizing compilers is challenging because
it is hard to generate valid test programs (i.e., those
that do not expose any undefined behavior). Equivalence
Modulo Inputs (EMI) is an effective, promising
methodology to tackle this problem. Given a test
program with some inputs, EMI mutates the program to
derive variants that are semantically equivalent w.r.t.
these inputs. The state-of-the-art instantiations of
EMI are Orion and Athena, both of which rely on
deleting code from or inserting code into code regions
that are not executed under the inputs. Although both
have demonstrated their ability in finding many bugs in
GCC and LLVM, they are still limited due to their
mutation strategies that operate only on dead code
regions. This paper presents a novel EMI technique that
allows mutation in the entire program (i.e., both live
and dead regions). By removing the restriction of
mutating only the dead regions, our technique
significantly increases the EMI variant space. It also
helps to more thoroughly stress test compilers as
compilers must optimize mutated live code, whereas
mutated dead code might be eliminated. Finally, our
technique also makes compiler bugs more noticeable as
miscompilations on mutated dead code may not be
observable. We have realized the proposed technique in
Hermes. The evaluation demonstrates Hermes's
effectiveness. In 13 months, Hermes found 168
confirmed, valid bugs in GCC and LLVM, of which 132
have already been fixed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Shan:2016:FRR,
author = "Zhiyong Shan and Tanzirul Azim and Iulian Neamtiu",
title = "Finding resume and restart errors in {Android}
applications",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "864--880",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984011",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Smartphone apps create and handle a large variety of
``instance'' data that has to persist across runs, such
as the current navigation route, workout results,
antivirus settings, or game state. Due to the nature of
the smartphone platform, an app can be paused, sent
into background, or killed at any time. If the instance
data is not saved and restored between runs, in
addition to data loss, partially-saved or corrupted
data can crash the app upon resume or restart. While
smartphone platforms offer API support for data-saving
and data-retrieving operations, the use of this API is
ad-hoc: left to the programmer, rather than enforced by
the compiler. We have observed that several categories
of bugs---including data loss, failure to
resume/restart or resuming/restarting in the wrong
state---are due to incorrect handling of instance data
and are easily triggered by just pressing the `Home' or
`Back' buttons. To help address this problem, we have
constructed a tool chain for Android (the KREfinder
static analysis and the KREreproducer input generator)
that helps find and reproduce such incorrect handling.
We have evaluated our approach by running the static
analysis on 324 apps, of which 49 were further analyzed
manually. Results indicate that our approach is (i)
effective, as it has discovered 49 bugs, including in
popular Android apps, and (ii) efficient, completing on
average in 61 seconds per app. More generally, our
approach helps determine whether an app saves too much
or too little state.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Zuo:2016:LOF,
author = "Zhiqiang Zuo and Lu Fang and Siau-Cheng Khoo and
Guoqing Xu and Shan Lu",
title = "Low-overhead and fully automated statistical debugging
with abstraction refinement",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "881--896",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984005",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cooperative statistical debugging is an effective
approach for diagnosing production-run failures. To
quickly identify failure predictors from the huge
program predicate space, existing techniques rely on
random or heuristics-guided predicate sampling at the
user side. However, none of them can satisfy the
requirements of low cost, low diagnosis latency, and
high diagnosis quality simultaneously, which are all
indispensable for statistical debugging to be
practical. This paper presents a new technique that
tackles the above challenges. We formulate the
technique as an instance of abstraction refinement,
where efficient abstract-level profiling is first
applied to the whole program and its execution brings
information that can pinpoint suspicious coarse-grained
entities that need to be refined. The refinement
profiles a corresponding set of fine-grained entities,
and generates feedback that determines what to prune
and what to refine next. The process is fully
automated, and more importantly, guided by a
mathematically rigorous analysis that guarantees that
our approach produces the same debugging results as an
exhaustive analysis in deterministic settings. We have
implemented this technique for both C and Java on both
single machine and distributed system. A thorough
evaluation demonstrates that our approach yields (1) an
order of magnitude reduction in the user-side runtime
overhead even compared to a sampling-based approach and
(2) two orders of magnitude reduction in the size of
data transferred over the network, completely
automatically without sacrificing any debugging
capability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Bavishi:2016:PRA,
author = "Rohan Bavishi and Awanish Pandey and Subhajit Roy",
title = "To be precise: regression aware debugging",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "897--915",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984014",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Bounded model checking based debugging solutions
search for mutations of program expressions that
produce the expected output for a currently failing
test. However, the current localization tools are not
regression aware: they do not use information from the
passing tests in their localization formula. On the
other hand, the current repair tools attempt to
guarantee regression freedom: when provided with a set
of passing tests, they guarantee that none of these
tests can break due to the suggested repair patch,
thereby constructing a large repair formula. In this
paper, we propose regression awareness as a means to
improve the quality of localization and to scale
repair. To enable regression awareness, we summarize
the proof of correctness of each passing test by
computing Craig Interpolants over a symbolic encoding
of the passing execution, and use these summaries as
additional soft constraints while synthesizing altered
executions corresponding to failing tests. Intuitively,
these additional constraints act as roadblocks, thereby
discouraging executions that may damage the proof of a
passing test. We use a partial MAXSAT solver to relax
the proofs in a systematic way, and use a ranking
function that penalizes mutations that damage the
existing proofs. We have implemented our algorithms
into a tool, TINTIN, that enables regression aware
localization and repair. For localizations, our
strategy is effective in extracting a superior ranking
of suspicious locations: on a set of 52 different
versions across 12 different programs spanning three
benchmark suites, TINTIN achieves a saving of developer
effort by almost 45\% (in terms of the locations that
must be examined by a developer to reach the
ground-truth repair) in the worst case and 27\% in the
average case over existing techniques. For automated
repairs, on our set of benchmarks, TINTIN achieves a
2.3X speedup over existing techniques without
sacrificing much on the ranking of the repair patches:
the ground-truth repair appears as the topmost
suggestion in more than 70\% of our benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Lifflander:2017:CLO,
author = "Jonathan Lifflander and Sriram Krishnamoorthy",
title = "Cache locality optimization for recursive programs",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "1--16",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062385",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an approach to optimize the cache locality
for recursive programs by dynamically
splicing---recursively interleaving---the execution of
distinct function invocations. By utilizing data effect
annotations, we identify concurrency and data reuse
opportunities across function invocations and
interleave them to reduce reuse distance. We present
algorithms that efficiently track effects in recursive
programs, detect interference and dependencies, and
interleave execution of function invocations using
user-level (non-kernel) lightweight threads. To enable
multi-core execution, a program is parallelized using a
nested fork/join programming model. Our cache
optimization strategy is designed to work in the
context of a random work stealing scheduler. We present
an implementation using the MIT Cilk framework that
demonstrates significant improvements in sequential and
parallel performance, competitive with a
state-of-the-art compile-time optimizer for loop
programs and a domain-specific optimizer for stencil
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Saarikivi:2017:FEC,
author = "Olli Saarikivi and Margus Veanes and Todd Mytkowicz
and Madan Musuvathi",
title = "Fusing effectful comprehensions",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "17--32",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062362",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "List comprehensions provide a powerful abstraction
mechanism for expressing computations over ordered
collections of data declaratively without having to use
explicit iteration constructs. This paper puts forth
effectful comprehensions as an elegant way to describe
list comprehensions that incorporate loop-carried
state. This is motivated by operations such as
compression/decompression and
serialization/deserialization that are common in
log/data processing pipelines and require loop-carried
state when processing an input stream of data. We build
on the underlying theory of symbolic transducers to
fuse pipelines of effectful comprehensions into a
single representation, from which efficient code can be
generated. Using background theory reasoning with an
SMT solver, our fusion and subsequent reachability
based branch elimination algorithms can significantly
reduce the complexity of the fused pipelines. Our
implementation shows significant speedups over
reasonable hand-written code (3.4 $ \times $, on
average) and traditionally fused version of the
pipeline (2.6 $ \times $, on average) for a variety of
examples, including scenarios for extracting fields
with regular expressions, processing XML with XPath,
and running queries over encoded data.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Ding:2017:GTD,
author = "Yufei Ding and Lin Ning and Hui Guan and Xipeng Shen",
title = "Generalizations of the theory and deployment of
triangular inequality for compiler-based strength
reduction",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "33--48",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062377",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Triangular Inequality (TI) has been used in many
manual algorithm designs to achieve good efficiency in
solving some distance calculation-based problems. This
paper presents our generalization of the idea into a
compiler optimization technique, named TI-based
strength reduction. The generalization consists of
three parts. The first is the establishment of the
theoretic foundation of this new optimization via the
development of a new form of TI named Angular
Triangular Inequality, along with several fundamental
theorems. The second is the revealing of the properties
of the new forms of TI and the proposal of guided TI
adaptation, a systematic method to address the
difficulties in effective deployments of TI
optimizations. The third is an integration of the new
optimization technique in an open-source compiler.
Experiments on a set of data mining and machine
learning algorithms show that the new technique can
speed up the standard implementations by as much as
134X and 46X on average for distance-related problems,
outperforming previous TI-based optimizations by 2.35X
on average. It also extends the applicability of
TI-based optimizations to vector related problems,
producing tens of times of speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Menendez:2017:AID,
author = "David Menendez and Santosh Nagarakatte",
title = "{Alive-Infer}: data-driven precondition inference for
peephole optimizations in {LLVM}",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "49--63",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062372",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Peephole optimizations are a common source of compiler
bugs. Compiler developers typically transform an
incorrect peephole optimization into a valid one by
strengthening the precondition. This process is
challenging and tedious. This paper proposes
Alive-Infer, a data-driven approach that infers
preconditions for peephole optimizations expressed in
Alive. Alive-Infer generates positive and negative
examples for an optimization, enumerates predicates
on-demand, and learns a set of predicates that separate
the positive and negative examples. Alive-Infer repeats
this process until it finds a precondition that ensures
the validity of the optimization. Alive-Infer reports
both a weakest precondition and a set of succinct
partial preconditions to the developer. Our prototype
generates preconditions that are weaker than LLVM's
preconditions for 73 optimizations in the Alive suite.
We also demonstrate the applicability of this technique
to generalize 54 optimization patterns generated by
Souper, an LLVM IR-based superoptimizer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Yessenov:2017:DAD,
author = "Kuat Yessenov and Ivan Kuraj and Armando
Solar-Lezama",
title = "{DemoMatch}: {API} discovery from demonstrations",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "64--78",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062386",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce DemoMatch, a tool for API discovery that
allows the user to discover how to implement
functionality using a software framework by
demonstrating the functionality in existing
applications built with the same framework. DemoMatch
matches the demonstrations against a database of
execution traces called Semeru and generates code
snippets explaining how to use the functionality. We
evaluated DemoMatch on several case studies involving
Java Swing and Eclipse RCP.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{David:2017:SBT,
author = "Yaniv David and Nimrod Partush and Eran Yahav",
title = "Similarity of binaries through re-optimization",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "79--94",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062387",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a scalable approach for establishing
similarity between stripped binaries (with no debug
information). The main challenge in binary similarity,
is to establish similarity even when the code has been
compiled using different compilers, with different
optimization levels, or targeting different
architectures. Overcoming this challenge, while
avoiding false positives, is invaluable to the process
of reverse engineering and the process of locating
vulnerable code. We present a technique that is
scalable and precise, as it alleviates the need for
heavyweight semantic comparison by performing
out-of-context re-optimization of procedure fragments.
It works by decomposing binary procedures to comparable
fragments and transforming them to a canonical,
normalized form using the compiler optimizer, which
enables finding equivalent fragments through simple
syntactic comparison. We use a statistical framework
built by analyzing samples collected ``in the wild'' to
generate a global context that quantifies the
significance of each pair of fragments, and uses it to
lift pairwise fragment equivalence to whole procedure
similarity. We have implemented our technique in a tool
called {\tt GitZ} and performed an extensive
evaluation. We show that {\tt GitZ} is able to perform
millions of comparisons efficiently, and find
similarity with high accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Bastani:2017:SPI,
author = "Osbert Bastani and Rahul Sharma and Alex Aiken and
Percy Liang",
title = "Synthesizing program input grammars",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "95--110",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062349",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an algorithm for synthesizing a
context-free grammar encoding the language of valid
program inputs from a set of input examples and
blackbox access to the program. Our algorithm addresses
shortcomings of existing grammar inference algorithms,
which both severely overgeneralize and are
prohibitively slow. Our implementation, GLADE,
leverages the grammar synthesized by our algorithm to
fuzz test programs with structured inputs. We show that
GLADE substantially increases the incremental coverage
on valid inputs compared to two baseline fuzzers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Huang:2017:CMC,
author = "Daniel Huang and Jean-Baptiste Tristan and Greg
Morrisett",
title = "Compiling {Markov} chain {Monte Carlo} algorithms for
probabilistic modeling",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "111--125",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062375",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The problem of probabilistic modeling and inference,
at a high-level, can be viewed as constructing a (
model, query, inference ) tuple, where an inference
algorithm implements a query on a model. Notably, the
derivation of inference algorithms can be a difficult
and error-prone task. Hence, researchers have explored
how ideas from probabilistic programming can be
applied. In the context of constructing these tuples,
probabilistic programming can be seen as taking a
language-based approach to probabilistic modeling and
inference. For instance, by using (1) appropriate
languages for expressing models and queries and (2)
devising inference techniques that operate on encodings
of models (and queries) as program expressions, the
task of inference can be automated. In this paper, we
describe a compiler that transforms a probabilistic
model written in a restricted modeling language and a
query for posterior samples given observed data into a
Markov Chain Monte Carlo (MCMC) inference algorithm
that implements the query. The compiler uses a sequence
of intermediate languages (ILs) that guide it in
gradually and successively refining a declarative
specification of a probabilistic model and the query
into an executable MCMC inference algorithm. The
compilation strategy produces composable MCMC
algorithms for execution on a CPU or GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Eizenberg:2017:BBL,
author = "Ariel Eizenberg and Yuanfeng Peng and Toma Pigli and
William Mansky and Joseph Devietti",
title = "{BARRACUDA}: binary-level analysis of runtime {RAces}
in {CUDA} programs",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "126--140",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062342",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "GPU programming models enable and encourage massively
parallel programming with over a million threads,
requiring extreme parallelism to achieve good
performance. Massive parallelism brings significant
correctness challenges by increasing the possibility
for bugs as the number of thread interleavings
balloons. Conventional dynamic safety analyses struggle
to run at this scale. We present BARRACUDA, a
concurrency bug detector for GPU programs written in
Nvidia's CUDA language. BARRACUDA handles a wider range
of parallelism constructs than previous work, including
branch operations, low-level atomics and memory fences,
which allows BARRACUDA to detect new classes of
concurrency bugs. BARRACUDA operates at the binary
level for increased compatibility with existing code,
leveraging a new binary instrumentation framework that
is extensible to other dynamic analyses. BARRACUDA
incorporates a number of novel optimizations that are
crucial for scaling concurrency bug detection to over a
million threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Rhodes:2017:BSC,
author = "Dustin Rhodes and Cormac Flanagan and Stephen N.
Freund",
title = "{BigFoot}: static check placement for dynamic race
detection",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "141--156",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062350",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Precise dynamic data race detectors provide strong
correctness guarantees but have high overheads because
they generally keep analysis state in a separate shadow
location for each heap memory location, and they check
(and potentially update) the corresponding shadow
location on each heap access. The BigFoot dynamic data
race detector uses a combination of static and dynamic
analysis techniques to coalesce checks and compress
shadow locations. With BigFoot, multiple accesses to an
object or array often induce a single coalesced check
that manipulates a single compressed shadow location,
resulting in a performance improvement over FastTrack
of 61\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Kini:2017:DRP,
author = "Dileep Kini and Umang Mathur and Mahesh Viswanathan",
title = "Dynamic race prediction in linear time",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "157--170",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062374",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing reliable concurrent software remains a huge
challenge for today's programmers. Programmers rarely
reason about their code by explicitly considering
different possible inter-leavings of its execution. We
consider the problem of detecting data races from
individual executions in a sound manner. The classical
approach to solving this problem has been to use
Lamport's happens-before (HB) relation. Until now HB
remains the only approach that runs in linear time.
Previous efforts in improving over HB such as
causally-precedes (CP) and maximal causal models fall
short due to the fact that they are not implementable
efficiently and hence have to compromise on their race
detecting ability by limiting their techniques to
bounded sized fragments of the execution. We present a
new relation weak-causally-precedes (WCP) that is
provably better than CP in terms of being able to
detect more races, while still remaining sound.
Moreover, it admits a linear time algorithm which works
on the entire execution without having to fragment
it.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Billes:2017:SBB,
author = "Marina Billes and Anders M{\o}ller and Michael
Pradel",
title = "Systematic black-box analysis of collaborative web
applications",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "171--184",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062364",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Web applications, such as collaborative editors that
allow multiple clients to concurrently interact on a
shared resource, are difficult to implement correctly.
Existing techniques for analyzing concurrent software
do not scale to such complex systems or do not consider
multiple interacting clients. This paper presents
Simian, the first fully automated technique for
systematically analyzing multi-client web applications.
Naively exploring all possible interactions between a
set of clients of such applications is practically
infeasible. Simian obtains scalability for real-world
applications by using a two-phase black-box approach.
The application code remains unknown to the analysis
and is first explored systematically using a single
client to infer potential conflicts between client
events triggered in a specific context. The second
phase synthesizes multi-client interactions targeted at
triggering misbehavior that may result from the
potential conflicts, and reports an inconsistency if
the clients do not converge to a consistent state. We
evaluate the analysis on three widely used systems,
Google Docs, Firepad, and ownCloud Documents, where it
reports a variety of inconsistencies, such as incorrect
formatting and misplaced text fragments. Moreover, we
find that the two-phase approach runs 10x faster
compared to exhaustive exploration, making systematic
analysis practically applicable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Haas:2017:BWS,
author = "Andreas Haas and Andreas Rossberg and Derek L. Schuff
and Ben L. Titzer and Michael Holman and Dan Gohman and
Luke Wagner and Alon Zakai and J. F. Bastien",
title = "Bringing the web up to speed with {WebAssembly}",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "185--200",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062363",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The maturation of the Web platform has given rise to
sophisticated and demanding Web applications such as
interactive 3D visualization, audio and video software,
and games. With that, efficiency and security of code
on the Web has become more important than ever. Yet
JavaScript as the only built-in language of the Web is
not well-equipped to meet these requirements,
especially as a compilation target. Engineers from the
four major browser vendors have risen to the challenge
and collaboratively designed a portable low-level
bytecode called WebAssembly. It offers compact
representation, efficient validation and compilation,
and safe low to no-overhead execution. Rather than
committing to a specific programming model, WebAssembly
is an abstraction over modern hardware, making it
language-, hardware-, and platform-independent, with
use cases beyond just the Web. WebAssembly has been
designed with a formal semantics from the start. We
describe the motivation, design and formal semantics of
WebAssembly and provide some preliminary experience
with implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Petrashko:2017:MCU,
author = "Dmitry Petrashko and Ondrej Lhot{\'a}k and Martin
Odersky",
title = "Miniphases: compilation using modular and efficient
tree transformations",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "201--216",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062346",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Production compilers commonly perform dozens of
transformations on an intermediate representation.
Running those transformations in separate passes harms
performance. One approach to recover performance is to
combine transformations by hand in order to reduce
number of passes. Such an approach harms modularity,
and thus makes it hard to maintain and evolve a
compiler over the long term, and makes reasoning about
performance harder. This paper describes a methodology
that allows a compiler writer to define multiple
transformations separately, but fuse them into a single
traversal of the intermediate representation when the
compiler runs. This approach has been implemented in a
compiler for the Scala language. Our performance
evaluation indicates that this approach reduces the
running time of tree transformations by 35\% and shows
that this is due to improved cache friendliness. At the
same time, the approach improves total memory
consumption by reducing the object tenuring rate by
50\%. This approach enables compiler writers to write
transformations that are both modular and fast at the
same time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Canino:2017:PAE,
author = "Anthony Canino and Yu David Liu",
title = "Proactive and adaptive energy-aware programming with
mixed typechecking",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "217--232",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062356",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Application-level energy management is an important
dimension of energy optimization. In this paper, we
introduce ENT, a novel programming language for
enabling *proactive* and *adaptive* mode-based energy
management at the application level. The proactive
design allows programmers to apply their application
knowledge to energy management, by characterizing the
energy behavior of different program fragments with
modes. The adaptive design allows such characterization
to be delayed until run time, useful for capturing
dynamic program behavior dependent on program states,
configuration settings, external battery levels, or CPU
temperatures. The key insight is both proactiveness and
adaptiveness can be unified under a type system
combined with static typing and dynamic typing. ENT has
been implemented as an extension to Java, and
successfully ported to three energy-conscious
platforms: an Intel-based laptop, a Raspberry Pi, and
an Android phone. Evaluation shows ENT improves the
programmability, debuggability, and energy efficiency
of battery-aware and temperature-aware programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Kedia:2017:SFS,
author = "Piyus Kedia and Manuel Costa and Matthew Parkinson and
Kapil Vaswani and Dimitrios Vytiniotis and Aaron
Blankstein",
title = "Simple, fast, and safe manual memory management",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "233--247",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062376",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Safe programming languages are readily available, but
many applications continue to be written in unsafe
languages because of efficiency. As a consequence, many
applications continue to have exploitable memory safety
bugs. Since garbage collection is a major source of
inefficiency in the implementation of safe languages,
replacing it with safe manual memory management would
be an important step towards solving this problem.
Previous approaches to safe manual memory management
use programming models based on regions, unique
pointers, borrowing of references, and ownership types.
We propose a much simpler programming model that does
not require any of these concepts. Starting from the
design of an imperative type safe language (like Java
or C\#), we just add a delete operator to free memory
explicitly and an exception which is thrown if the
program dereferences a pointer to freed memory. We
propose an efficient implementation of this programming
model that guarantees type safety. Experimental results
from our implementation based on the C\# native
compiler show that this design achieves up to 3x
reduction in peak working set and run time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Kincaid:2017:CRA,
author = "Zachary Kincaid and Jason Breck and Ashkan Forouhi
Boroujeni and Thomas Reps",
title = "Compositional recurrence analysis revisited",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "248--262",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compositional recurrence analysis (CRA) is a
static-analysis method based on a combination of
symbolic analysis and abstract interpretation. This
paper addresses the problem of creating a
context-sensitive interprocedural version of CRA that
handles recursive procedures. The problem is
non-trivial because there is an ``impedance mismatch''
between CRA, which relies on analysis techniques based
on regular languages (i.e., Tarjan's path-expression
method), and the context-free-language underpinnings of
context-sensitive analysis. We show how to address this
impedance mismatch by augmenting the CRA abstract
domain with additional operations. We call the
resulting algorithm Interprocedural CRA (ICRA). Our
experiments with ICRA show that it has broad overall
strength compared with several state-of-the-art
software model checkers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Thiessen:2017:CTP,
author = "Rei Thiessen and Ondrej Lhot{\'a}k",
title = "Context transformations for pointer analysis",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "263--277",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062359",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Points-to analysis for Java benefits greatly from
context sensitivity. CFL-reachability and k -limited
context strings are two approaches to obtaining context
sensitivity with different advantages: CFL-reachability
allows local reasoning about data-value flow and thus
is suitable for demand-driven analyses, whereas k
-limited analyses allow object sensitivity which is a
superior calling context abstraction for
object-oriented languages. We combine the advantages of
both approaches to obtain a context-sensitive analysis
that is as precise as k -limited context strings, but
is more efficient to compute. Our key insight is based
on a novel abstraction of contexts adapted from
CFL-reachability that represents a relation between two
calling contexts as a composition of transformations
over contexts. We formulate pointer analysis in an
algebraic structure of context transformations, which
is a set of functions over calling contexts closed
under function composition. We show that the context
representation of context-string-based analyses is an
explicit enumeration of all input and output values of
context transformations. CFL-reachability-based pointer
analysis is formulated to use call-strings as contexts,
but the context transformations concept can be applied
to any context abstraction used in k -limited analyses,
including object- and type-sensitive analysis. The
result is a more efficient algorithm for computing
context-sensitive results for a wide variety of context
configurations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Tan:2017:EPP,
author = "Tian Tan and Yue Li and Jingling Xue",
title = "Efficient and precise points-to analysis: modeling the
heap by merging equivalent automata",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "278--291",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062360",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mainstream points-to analysis techniques for
object-oriented languages rely predominantly on the
allocation-site abstraction to model heap objects. We
present MAHJONG, a novel heap abstraction that is
specifically developed to address the needs of an
important class of type-dependent clients, such as call
graph construction, devirtualization and may-fail
casting. By merging equivalent automata representing
type-consistent objects that are created by the
allocation-site abstraction, MAHJONG enables an
allocation-site-based points-to analysis to run
significantly faster while achieving nearly the same
precision for type-dependent clients. MAHJONG is simple
conceptually, efficient, and drops easily on any
allocation-site-based points-to analysis. We
demonstrate its effectiveness by discussing some
insights on why it is a better alternative of the
allocation-site abstraction for type-dependent clients
and evaluating it extensively on 12 large real-world
Java programs with five context-sensitive points-to
analyses and three widely used type-dependent clients.
MAHJONG is expected to provide significant benefits for
many program analyses where call graphs are required.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Santhiar:2017:SDD,
author = "Anirudh Santhiar and Aditya Kanade",
title = "Static deadlock detection for asynchronous {C\#}
programs",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "292--305",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062361",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Asynchronous programming is a standard approach for
designing responsive applications. Modern languages
such as C\# provide async/await primitives for the
disciplined use of asynchrony. In spite of this,
programs can deadlock because of incorrect use of
blocking operations along with non-blocking
(asynchronous) operations. While developers are aware
of this problem, there is no automated technique to
detect deadlocks in asynchronous programs. We present a
novel representation of control flow and scheduling of
asynchronous programs, called continuation scheduling
graph and formulate necessary conditions for a deadlock
to occur in a program. We design static analyses to
construct continuation scheduling graphs of
asynchronous C\# programs and to identify deadlocks in
them. We have implemented the static analyses in a tool
called DeadWait. Using DeadWait, we found 43 previously
unknown deadlocks in 11 asynchronous C\# libraries. We
reported the deadlocks to the library developers. They
have confirmed and fixed 40 of them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Fu:2017:AHC,
author = "Zhoulai Fu and Zhendong Su",
title = "Achieving high coverage for floating-point code via
unconstrained programming",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "306--319",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062383",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Achieving high code coverage is essential in testing,
which gives us confidence in code quality. Testing
floating-point code usually requires painstaking
efforts in handling floating-point constraints, e.g.,
in symbolic execution. This paper turns the challenge
of testing floating-point code into the opportunity of
applying unconstrained programming --- the mathematical
solution for calculating function minimum points over
the entire search space. Our core insight is to derive
a representing function from the floating-point
program, any of whose minimum points is a test input
guaranteed to exercise a new branch of the tested
program. This guarantee allows us to achieve high
coverage of the floating-point program by repeatedly
minimizing the representing function. We have realized
this approach in a tool called CoverMe and conducted an
extensive evaluation of it on Sun's C math library. Our
evaluation results show that CoverMe achieves, on
average, 90.8\% branch coverage in 6.9 seconds,
drastically outperforming our compared tools: (1)
Random testing, (2) AFL, a highly optimized, robust
fuzzer released by Google, and (3) Austin, a
state-of-the-art coverage-based testing tool designed
to support floating-point code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Chamith:2017:IPL,
author = "Buddhika Chamith and Bo Joel Svensson and Luke
Dalessandro and Ryan R. Newton",
title = "Instruction punning: lightweight instrumentation for
x86-64",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "320--332",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062344",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing techniques for injecting probes into running
applications are limited; they either fail to support
probing arbitrary locations, or to support scalable,
rapid toggling of probes. We introduce a new technique
on x86-64, called instruction punning, which allows
scalable probes at any instruction. The key idea is
that when we inject a jump instruction, the relative
address of the jump serves simultaneously as data and
as an instruction sequence. We show that this approach
achieves probe invocation overheads of only a few dozen
cycles, and probe activation/deactivation costs that
are cheaper than a system call, even when all threads
in the system are both invoking probes and toggling
them.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{DAntras:2017:LOD,
author = "Amanieu D'Antras and Cosmin Gorgovan and Jim Garside
and Mikel Luj{\'a}n",
title = "Low overhead dynamic binary translation on {ARM}",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "333--346",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062371",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The ARMv8 architecture introduced AArch64, a 64-bit
execution mode with a new instruction set, while
retaining binary compatibility with previous versions
of the ARM architecture through AArch32, a 32-bit
execution mode. Most hardware implementations of ARMv8
processors support both AArch32 and AArch64, which
comes at a cost in hardware complexity. We present
MAMBO-X64, a dynamic binary translator for Linux which
executes 32-bit ARM binaries using only the AArch64
instruction set. We have evaluated the performance of
MAMBO-X64 on three existing ARMv8 processors which
support both AArch32 and AArch64 instruction sets. The
performance was measured by comparing the running time
of 32-bit benchmarks running under MAMBO-X64 with the
same benchmark running natively. On SPEC CPU2006, we
achieve a geometric mean overhead of less than 7.5\% on
in-order Cortex-A53 processors and a performance
improvement of 1\% on out-of-order X-Gene 1 processors.
MAMBO-X64 achieves such low overhead by novel
optimizations to map AArch32 floating-point registers
to AArch64 registers dynamically, handle overflowing
address calculations efficiently, generate traces that
harness hardware return address prediction, and handle
operating system signals accurately.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Zhang:2017:SPE,
author = "Qirun Zhang and Chengnian Sun and Zhendong Su",
title = "Skeletal program enumeration for rigorous compiler
testing",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "347--361",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062379",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A program can be viewed as a syntactic structure P
(syntactic skeleton) parameterized by a collection of
identifiers V (variable names). This paper introduces
the skeletal program enumeration (SPE) problem: Given a
syntactic skeleton P and a set of variables V ,
enumerate a set of programs P exhibiting all possible
variable usage patterns within P. It proposes an
effective realization of SPE for systematic, rigorous
compiler testing by leveraging three important
observations: (1) Programs with different variable
usage patterns exhibit diverse control- and
data-dependence, and help exploit different compiler
optimizations; (2) most real compiler bugs were
revealed by small tests (i.e., small-sized P) --- this
``small-scope'' observation opens up SPE for practical
compiler validation; and (3) SPE is exhaustive w.r.t. a
given syntactic skeleton and variable set, offering a
level of guarantee absent from all existing compiler
testing techniques. The key challenge of SPE is how to
eliminate the enormous amount of equivalent programs
w.r.t. \alpha -conversion. Our main technical
contribution is a novel algorithm for computing the
canonical (and smallest) set of all non- \alpha
-equivalent programs. To demonstrate its practical
utility, we have applied the SPE technique to test
C/C++ compilers using syntactic skeletons derived from
their own regression test-suites. Our evaluation
results are extremely encouraging. In less than six
months, our approach has led to 217 confirmed GCC/Clang
bug reports, 119 of which have already been fixed, and
the majority are long latent despite extensive prior
testing efforts. Our SPE algorithm also provides six
orders of magnitude reduction. Moreover, in three
weeks, our technique has found 29 CompCert crashing
bugs and 42 bugs in two Scala optimizing compilers.
These results demonstrate our SPE technique's
generality and further illustrate its effectiveness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Antonopoulos:2017:DIS,
author = "Timos Antonopoulos and Paul Gazzillo and Michael Hicks
and Eric Koskinen and Tachio Terauchi and Shiyi Wei",
title = "Decomposition instead of self-composition for proving
the absence of timing channels",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "362--375",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062378",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel approach to proving the absence of
timing channels. The idea is to partition the program's
execution traces in such a way that each partition
component is checked for timing attack resilience by a
time complexity analysis and that per-component
resilience implies the resilience of the whole program.
We construct a partition by splitting the program
traces at secret-independent branches. This ensures
that any pair of traces with the same public input has
a component containing both traces. Crucially, the
per-component checks can be normal safety properties
expressed in terms of a single execution. Our approach
is thus in contrast to prior approaches, such as
self-composition, that aim to reason about multiple $
(k \geq 2) $ executions at once. We formalize the above
as an approach called quotient partitioning,
generalized to any k -safety property, and prove it to
be sound. A key feature of our approach is a
demand-driven partitioning strategy that uses a
regex-like notion called trails to identify sets of
execution traces, particularly those influenced by
tainted (or secret) data. We have applied our technique
in a prototype implementation tool called Blazer, based
on WALA, PPL, and the brics automaton library. We have
proved timing-channel freedom of (or synthesized an
attack specification for) 24 programs written in Java
bytecode, including 6 classic examples from the
literature and 6 examples extracted from the DARPA STAC
challenge problems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Hu:2017:API,
author = "Qinheping Hu and Loris D'Antoni",
title = "Automatic program inversion using symbolic
transducers",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "376--389",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062345",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a fully-automated technique for inverting
functional programs that operate over lists such as
string encoders and decoders. We consider programs that
can be modeled using symbolic extended finite
transducers (), an expressive model that can describe
complex list-manipulating programs while retaining
several decidable properties. Concretely, given a
program P expressed as an , we propose techniques for:
(1) checking whether P is injective and, if that is the
case, (2) building an P$^{-1}$ describing its inverse.
We first show that it is undecidable to check whether
an is injective and propose an algorithm for checking
injectivity for a restricted, but a practical class of
. We then propose an algorithm for inverting based on
the following idea: if an is injective, inverting it
amounts to inverting all its individual transitions. We
leverage recent advances program synthesis and show
that the transition inversion problem can be expressed
as an instance of the syntax-guided synthesis
framework. Finally, we implement the proposed
techniques in a tool called and show that can invert 13
out 14 real complex string encoders and decoders,
producing inverse programs that are substantially
identical to manually written ones.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Ohmann:2017:CFR,
author = "Peter Ohmann and Alexander Brooks and Loris D'Antoni
and Ben Liblit",
title = "Control-flow recovery from partial failure reports",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "390--405",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062368",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Debugging is difficult. When software fails in
production, debugging is even harder, as failure
reports usually provide only an incomplete picture of
the failing execution. We present a system that answers
control-flow queries posed by developers as formal
languages, indicating whether the query expresses
control flow that is possible or impossible for a given
failure report. We consider three separate approaches
that trade off precision, expressiveness for failure
constraints, and scalability. We also introduce a new
subclass of regular languages, the unreliable trace
languages, which are particularly suited to answering
control-flow queries in polynomial time. Our system
answers queries remarkably efficiently when we encode
failure constraints and user queries entirely as
unreliable trace languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Doychev:2017:RAS,
author = "Goran Doychev and Boris K{\"o}pf",
title = "Rigorous analysis of software countermeasures against
cache attacks",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "406--421",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062388",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "CPU caches introduce variations into the execution
time of programs that can be exploited by adversaries
to recover private information about users or
cryptographic keys. Establishing the security of
countermeasures against this threat often requires
intricate reasoning about the interactions of program
code, memory layout, and hardware architecture and has
so far only been done for restricted cases. In this
paper we devise novel techniques that provide support
for bit-level and arithmetic reasoning about memory
accesses in the presence of dynamic memory allocation.
These techniques enable us to perform the first
rigorous analysis of widely deployed software
countermeasures against cache attacks on modular
exponentiation, based on executable code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Feng:2017:CBSb,
author = "Yu Feng and Ruben Martins and Jacob {Van Geffen} and
Isil Dillig and Swarat Chaudhuri",
title = "Component-based synthesis of table consolidation and
transformation tasks from examples",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "422--436",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062351",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a novel component-based synthesis
algorithm that marries the power of type-directed
search with lightweight SMT-based deduction and partial
evaluation. Given a set of components together with
their over-approximate first-order specifications, our
method first generates a program sketch over a subset
of the components and checks its feasibility using an
SMT solver. Since a program sketch typically represents
many concrete programs, the use of SMT-based deduction
greatly increases the scalability of the algorithm.
Once a feasible program sketch is found, our algorithm
completes the sketch in a bottom-up fashion, using
partial evaluation to further increase the power of
deduction for rejecting partially-filled program
sketches. We apply the proposed synthesis methodology
for automating a large class of data preparation tasks
that commonly arise in data science. We have evaluated
our synthesis algorithm on dozens of data wrangling and
consolidation tasks obtained from on-line forums, and
we show that our approach can automatically solve a
large class of problems encountered by R users.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Beckett:2017:NCS,
author = "Ryan Beckett and Ratul Mahajan and Todd Millstein and
Jitendra Padhye and David Walker",
title = "Network configuration synthesis with abstract
topologies",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "437--451",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062367",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We develop Propane/AT, a system to synthesize
provably-correct BGP (border gateway protocol)
configurations for large, evolving networks from
high-level specifications of topology, routing policy,
and fault-tolerance requirements. Propane/AT is based
on new abstractions for capturing parameterized network
topologies and their evolution, and algorithms to
analyze the impact of topology and routing policy on
fault tolerance. Our algorithms operate entirely on
abstract topologies. We prove that the properties
established by our analyses hold for every concrete
instantiation of the given abstract topology.
Propane/AT also guarantees that only incremental
changes to existing device configurations are required
when the network evolves to add or remove devices and
links. Our experiments with real-world topologies and
policies show that our abstractions and algorithms are
effective, and that, for large networks, Propane/AT
synthesizes configurations two orders of magnitude
faster than systems that operate on concrete
topologies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Wang:2017:SHE,
author = "Chenglong Wang and Alvin Cheung and Rastislav Bodik",
title = "Synthesizing highly expressive {SQL} queries from
input-output examples",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "452--466",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062365",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "SQL is the de facto language for manipulating
relational data. Though powerful, many users find it
difficult to write SQL queries due to highly expressive
constructs. While using the programming-by-example
paradigm to help users write SQL queries is an
attractive proposition, as evidenced by online help
forums such as Stack Overflow, developing techniques
for synthesizing SQL queries from given input-output
(I/O) examples has been difficult, due to the large
space of SQL queries as a result of its rich set of
operators. In this paper, we present a new scalable and
efficient algorithm for synthesizing SQL queries based
on I/O examples. The key innovation of our algorithm is
development of a language for abstract queries, i.e.,
queries with uninstantiated operators, that can be used
to express a large space of SQL queries efficiently.
Using abstract queries to represent the search space
nicely decomposes the synthesis problem into two tasks:
(1) searching for abstract queries that can potentially
satisfy the given I/O examples, and (2) instantiating
the found abstract queries and ranking the results. We
have implemented this algorithm in a new tool called
Scythe and evaluated it using 193 benchmarks collected
from Stack Overflow. Our evaluation shows that Scythe
can efficiently solve 74\% of the benchmarks, most in
just a few seconds, and the queries range from simple
ones involving a single selection to complex queries
with 6 nested subqueires.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Bornholt:2017:SMM,
author = "James Bornholt and Emina Torlak",
title = "Synthesizing memory models from framework sketches and
Litmus tests",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "467--481",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062353",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A memory consistency model specifies which writes to
shared memory a given read may see. Ambiguities or
errors in these specifications can lead to bugs in both
compilers and applications. Yet architectures usually
define their memory models with prose and litmus
tests-small concurrent programs that demonstrate
allowed and forbidden outcomes. Recent work has
formalized the memory models of common architectures
through substantial manual effort, but as new
architectures emerge, there is a growing need for tools
to aid these efforts. This paper presents MemSynth, a
synthesis-aided system for reasoning about axiomatic
specifications of memory models. MemSynth takes as
input a set of litmus tests and a framework sketch that
defines a class of memory models. The sketch comprises
a set of axioms with missing expressions (or holes).
Given these inputs, MemSynth synthesizes a completion
of the axioms-i.e., a memory model-that gives the
desired outcome on all tests. The MemSynth engine
employs a novel embedding of bounded relational logic
in a solver-aided programming language, which enables
it to tackle complex synthesis queries intractable to
existing relational solvers. This design also enables
it to solve new kinds of queries, such as checking if a
set of litmus tests unambiguously defines a memory
model within a framework sketch. We show that MemSynth
can synthesize specifications for x86 in under two
seconds, and for PowerPC in 12 seconds from 768 litmus
tests. Our ambiguity check identifies missing tests
from both the Intel x86 documentation and the
validation suite of a previous PowerPC formalization.
We also used MemSynth to reproduce, debug, and
automatically repair a paper on comparing memory models
in just two days.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Maurer:2017:CC,
author = "Luke Maurer and Paul Downen and Zena M. Ariola and
Simon Peyton Jones",
title = "Compiling without continuations",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "482--494",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062380",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many fields of study in compilers give rise to the
concept of a join point-a place where different
execution paths come together. Join points are often
treated as functions or continuations, but we believe
it is time to study them in their own right. We show
that adding join points to a direct-style functional
intermediate language is a simple but powerful change
that allows new optimizations to be performed,
including a significant improvement to list fusion.
Finally, we report on recent work on adding join points
to the intermediate language of the Glasgow Haskell
Compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Patterson:2017:FRM,
author = "Daniel Patterson and Jamie Perconti and Christos
Dimoulas and Amal Ahmed",
title = "{FunTAL}: reasonably mixing a functional language with
assembly",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "495--509",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062347",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present FunTAL, the first multi-language system to
formalize safe interoperability between a high-level
functional language and low-level assembly code while
supporting compositional reasoning about the mix. A
central challenge in developing such a multi-language
is bridging the gap between assembly, which is staged
into jumps to continuations, and high-level code, where
subterms return a result. We present a compositional
stack-based typed assembly language that supports
components, comprised of one or more basic blocks, that
may be embedded in high-level contexts. We also present
a logical relation for FunTAL that supports reasoning
about equivalence of high-level components and their
assembly replacements, mixed-language programs with
callbacks between languages, and assembly components
comprised of different numbers of basic blocks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Chu:2017:HPQ,
author = "Shumo Chu and Konstantin Weitz and Alvin Cheung and
Dan Suciu",
title = "{HoTTSQL}: proving query rewrites with univalent {SQL}
semantics",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "510--524",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062348",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Every database system contains a query optimizer that
performs query rewrites. Unfortunately, developing
query optimizers remains a highly challenging task.
Part of the challenges comes from the intricacies and
rich features of query languages, which makes reasoning
about rewrite rules difficult. In this paper, we
propose a machine-checkable denotational semantics for
SQL, the de facto language for relational database, for
rigorously validating rewrite rules. Unlike previously
proposed semantics that are either non-mechanized or
only cover a small amount of SQL language features, our
semantics covers all major features of SQL, including
bags, correlated subqueries, aggregation, and indexes.
Our mechanized semantics, called HoTT SQL, is based on
K-Relations and homotopy type theory, where we denote
relations as mathematical functions from tuples to
univalent types. We have implemented HoTTSQL in Coq,
which takes only fewer than 300 lines of code and have
proved a wide range of SQL rewrite rules, including
those from database research literature (e.g., magic
set rewrites) and real-world query optimizers (e.g.,
subquery elimination). Several of these rewrite rules
have never been previously proven correct. In addition,
while query equivalence is generally undecidable, we
have implemented an automated decision procedure using
HoTTSQL for conjunctive queries: a well studied
decidable fragment of SQL that encompasses many
real-world queries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Eisenberg:2017:LP,
author = "Richard A. Eisenberg and Simon Peyton Jones",
title = "Levity polymorphism",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "525--539",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062357",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parametric polymorphism is one of the linchpins of
modern typed programming, but it comes with a real
performance penalty. We describe this penalty; offer a
principled way to reason about it (kinds as calling
conventions); and propose levity polymorphism. This new
form of polymorphism allows abstractions over calling
conventions; we detail and verify restrictions that are
necessary in order to compile levity-polymorphic
functions. Levity polymorphism has created new
opportunities in Haskell, including the ability to
generalize nearly half of the type classes in GHC's
standard library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Farzan:2017:SDC,
author = "Azadeh Farzan and Victor Nicolet",
title = "Synthesis of divide and conquer parallelism for
loops",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "540--555",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062355",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Divide-and-conquer is a common parallel programming
skeleton supported by many cross-platform multithreaded
libraries, and most commonly used by programmers for
parallelization. The challenges of producing (manually
or automatically) a correct divide-and-conquer parallel
program from a given sequential code are two-fold: (1)
assuming that a good solution exists where individual
worker threads execute a code identical to the
sequential one, the programmer has to provide the extra
code for dividing the tasks and combining the partial
results (i.e. joins), and (2) the sequential code may
not be suitable for divide-and-conquer parallelization
as is, and may need to be modified to become a part of
a good solution. We address both challenges in this
paper. We present an automated synthesis technique to
synthesize correct joins and an algorithm for modifying
the sequential code to make it suitable for
parallelization when necessary. This paper focuses on
class of loops that traverse a read-only collection and
compute a scalar function over that collection. We
present theoretical results for when the necessary
modifications to sequential code are possible,
theoretical guarantees for the algorithmic solutions
presented here, and experimental evaluation of the
approach's success in practice and the quality of the
produced parallel programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Henriksen:2017:FPF,
author = "Troels Henriksen and Niels G. W. Serup and Martin
Elsman and Fritz Henglein and Cosmin E. Oancea",
title = "{Futhark}: purely functional {GPU-programming} with
nested parallelism and in-place array updates",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "556--571",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062354",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Futhark is a purely functional data-parallel array
language that offers a machine-neutral programming
model and an optimising compiler that generates OpenCL
code for GPUs. This paper presents the design and
implementation of three key features of Futhark that
seek a suitable middle ground with imperative
approaches. First, in order to express efficient code
inside the parallel constructs, we introduce a simple
type system for in-place updates that ensures
referential transparency and supports equational
reasoning. Second, we furnish Futhark with parallel
operators capable of expressing efficient
strength-reduced code, along with their fusion rules.
Third, we present a flattening transformation aimed at
enhancing the degree of parallelism that (i) builds on
loop interchange and distribution but uses higher-order
reasoning rather than array-dependence analysis, and
(ii) still allows further locality-of-reference
optimisations. Finally, an evaluation on 16 benchmarks
demonstrates the impact of the language and compiler
features and shows application-level performance
competitive with hand-written GPU code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Fedyukovich:2017:GSS,
author = "Grigory Fedyukovich and Maaz Bin Safeer Ahmad and
Rastislav Bodik",
title = "Gradual synthesis for static parallelization of
single-pass array-processing programs",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "572--585",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062382",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parallelizing of software improves its effectiveness
and productivity. To guarantee correctness, the
parallel and serial versions of the same code must be
formally verified to be equivalent. We present a novel
approach, called GRASSP, that automatically synthesizes
parallel single-pass array-processing programs by
treating the given serial versions as specifications.
Given arbitrary segmentation of the input array, GRASSP
synthesizes a code to determine a new segmentation of
the array that allows computing partial results for
each segment and merging them. In contrast to other
parallelizers, GRASSP gradually considers several
parallelization scenarios and certifies the results
using constrained Horn solving. For several classes of
programs, we show that such parallelization can be
performed efficiently. The C++ translations of the
GRASSP solutions sped performance by up to 5X relative
to serial code on an 8-thread machine and Hadoop
translations by up to 10X on a 10-node Amazon EMR
cluster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Bourke:2017:FVC,
author = "Timothy Bourke and L{\'e}lio Brun and
Pierre-{\'E}variste Dagand and Xavier Leroy and Marc
Pouzet and Lionel Rieg",
title = "A formally verified compiler for {Lustre}",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "586--601",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062358",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The correct compilation of block diagram languages
like Lustre, Scade, and a discrete subset of Simulink
is important since they are used to program critical
embedded control software. We describe the
specification and verification in an Interactive
Theorem Prover of a compilation chain that treats the
key aspects of Lustre: sampling, nodes, and delays.
Building on CompCert, we show that repeated execution
of the generated assembly code faithfully implements
the dataflow semantics of source programs. We resolve
two key technical challenges. The first is the change
from a synchronous dataflow semantics, where programs
manipulate streams of values, to an imperative one,
where computations manipulate memory sequentially. The
second is the verified compilation of an imperative
language with encapsulated state to C code where the
state is realized by nested records. We also treat a
standard control optimization that eliminates
unnecessary conditional statements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Abdulla:2017:FCF,
author = "Parosh Aziz Abdulla and Mohamed Faouzi Atig and
Yu-Fang Chen and Bui Phi Diep and Luk{\'a}s Hol{\'\i}k
and Ahmed Rezine and Philipp R{\"u}mmer",
title = "Flatten and conquer: a framework for efficient
analysis of string constraints",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "602--617",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062384",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a uniform and efficient framework for
checking the satisfiability of a large class of string
constraints. The framework is based on the observation
that both satisfiability and unsatisfiability of common
constraints can be demonstrated through witnesses with
simple patterns. These patterns are captured using flat
automata each of which consists of a sequence of simple
loops. We build a Counter-Example Guided Abstraction
Refinement (CEGAR) framework which contains both an
under- and an over-approximation module. The flow of
information between the modules allows to increase the
precision in an automatic manner. We have implemented
the framework as a tool and performed extensive
experimentation that demonstrates both the generality
and efficiency of our method.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Lahav:2017:RSC,
author = "Ori Lahav and Viktor Vafeiadis and Jeehoon Kang and
Chung-Kil Hur and Derek Dreyer",
title = "Repairing sequential consistency in {C\slash C++11}",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "618--632",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062352",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The C/C++11 memory model defines the semantics of
concurrent memory accesses in C/C++, and in particular
supports racy ``atomic'' accesses at a range of
different consistency levels, from very weak
consistency (``relaxed'') to strong, sequential
consistency (``SC''). Unfortunately, as we observe in
this paper, the semantics of SC atomic accesses in
C/C++11, as well as in all proposed strengthenings of
the semantics, is flawed, in that (contrary to
previously published results) both suggested
compilation schemes to the Power architecture are
unsound. We propose a model, called RC11 (for Repaired
C11), with a better semantics for SC accesses that
restores the soundness of the compilation schemes to
Power, maintains the DRF-SC guarantee, and provides
stronger, more useful, guarantees to SC fences. In
addition, we formally prove, for the first time, the
correctness of the proposed stronger compilation
schemes to Power that preserve load-to-store ordering
and avoid ``out-of-thin-air'' reads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Lee:2017:TUB,
author = "Juneyoung Lee and Yoonseung Kim and Youngju Song and
Chung-Kil Hur and Sanjoy Das and David Majnemer and
John Regehr and Nuno P. Lopes",
title = "Taming undefined behavior in {LLVM}",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "633--647",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062343",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A central concern for an optimizing compiler is the
design of its intermediate representation (IR) for
code. The IR should make it easy to perform
transformations, and should also afford efficient and
precise static analysis. In this paper we study an
aspect of IR design that has received little attention:
the role of undefined behavior. The IR for every
optimizing compiler we have looked at, including GCC,
LLVM, Intel's, and Microsoft's, supports one or more
forms of undefined behavior (UB), not only to reflect
the semantics of UB-heavy programming languages such as
C and C++, but also to model inherently unsafe
low-level operations such as memory stores and to avoid
over-constraining IR semantics to the point that
desirable transformations become illegal. The current
semantics of LLVM's IR fails to justify some cases of
loop unswitching, global value numbering, and other
important ``textbook'' optimizations, causing
long-standing bugs. We present solutions to the
problems we have identified in LLVM's IR and show that
most optimizations currently in LLVM remain sound, and
that some desirable new transformations become
permissible. Our solutions do not degrade compile time
or performance of generated code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Schneider:2017:LSM,
author = "Scott Schneider and Kun-Lung Wu",
title = "Low-synchronization, mostly lock-free, elastic
scheduling for streaming runtimes",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "648--661",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062366",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the scalable, elastic operator scheduler in
IBM Streams 4.2. Streams is a distributed stream
processing system used in production at many companies
in a wide range of industries. The programming language
for Streams, SPL, presents operators, tuples and
streams as the primary abstractions. A fundamental SPL
optimization is operator fusion, where multiple
operators execute in the same process. Streams 4.2
introduces automatic submission-time fusion to simplify
application development and deployment. However,
potentially thousands of operators could then execute
in the same process, with no user guidance for thread
placement. We needed a way to automatically figure out
how many threads to use, with arbitrarily sized
applications on a wide variety of hardware, and without
any input from programmers. Our solution has two
components. The first is a scalable operator scheduler
that minimizes synchronization, locks and global data,
while allowing threads to execute any operator and
dynamically come and go. The second is an elastic
algorithm to dynamically adjust the number of threads
to optimize performance, using the principles of
trusted measurements to establish trends. We
demonstrate our scheduler's ability to scale to over a
hundred threads, and our elasticity algorithm's ability
to adapt to different workloads on an Intel Xeon system
with 176 logical cores, and an IBM Power8 system with
184 logical cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Wurthinger:2017:PPE,
author = "Thomas W{\"u}rthinger and Christian Wimmer and
Christian Humer and Andreas W{\"o}{\ss} and Lukas
Stadler and Chris Seaton and Gilles Duboscq and Doug
Simon and Matthias Grimmer",
title = "Practical partial evaluation for high-performance
dynamic language runtimes",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "662--676",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062381",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most high-performance dynamic language virtual
machines duplicate language semantics in the
interpreter, compiler, and runtime system. This
violates the principle to not repeat yourself. In
contrast, we define languages solely by writing an
interpreter. The interpreter performs specializations,
e.g., augments the interpreted program with type
information and profiling information. Compiled code is
derived automatically using partial evaluation while
incorporating these specializations. This makes partial
evaluation practical in the context of dynamic
languages: It reduces the size of the compiled code
while still compiling all parts of an operation that
are relevant for a particular program. When a
speculation fails, execution transfers back to the
interpreter, the program re-specializes in the
interpreter, and later partial evaluation again
transforms the new state of the interpreter to compiled
code. We evaluate our approach by comparing our
implementations of JavaScript, Ruby, and R with
best-in-class specialized production implementations.
Our general-purpose compilation system is competitive
with production systems even when they have been
heavily optimized for the one language they support.
For our set of benchmarks, our speedup relative to the
V8 JavaScript VM is 0.83x, relative to JRuby is 3.8x,
and relative to GNU R is 5x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Muller:2017:RPC,
author = "Stefan K. Muller and Umut A. Acar and Robert Harper",
title = "Responsive parallel computation: bridging competitive
and cooperative threading",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "677--692",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062370",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Competitive and cooperative threading are widely used
abstractions in computing. In competitive threading,
threads are scheduled preemptively with the goal of
minimizing response time, usually of interactive
applications. In cooperative threading, threads are
scheduled non-preemptively with the goal of maximizing
throughput or minimizing the completion time, usually
in compute-intensive applications, e.g. scientific
computing, machine learning and AI. Although both of
these forms of threading rely on the same abstraction
of a thread, they have, to date, remained largely
separate forms of computing. Motivated by the recent
increase in the mainstream use of multicore computers,
we propose a threading model that aims to unify
competitive and cooperative threading. To this end, we
extend the classic graph-based cost model for
cooperative threading to allow for competitive
threading, and describe how such a cost model may be
used in a programming language by presenting a language
and a corresponding cost semantics. Finally, we show
that the cost model and the semantics are realizable by
presenting an operational semantics for the language
that specifies the behavior of an implementation, as
well as an implementation and a small empirical
evaluation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Mamouras:2017:SMS,
author = "Konstantinos Mamouras and Mukund Raghothaman and
Rajeev Alur and Zachary G. Ives and Sanjeev Khanna",
title = "{StreamQRE}: modular specification and efficient
evaluation of quantitative queries over streaming
data",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "693--708",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062369",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Real-time decision making in emerging IoT applications
typically relies on computing quantitative summaries of
large data streams in an efficient and incremental
manner. To simplify the task of programming the desired
logic, we propose StreamQRE, which provides natural and
high-level constructs for processing streaming data.
Our language has a novel integration of linguistic
constructs from two distinct programming paradigms:
streaming extensions of relational query languages and
quantitative extensions of regular expressions. The
former allows the programmer to employ relational
constructs to partition the input data by keys and to
integrate data streams from different sources, while
the latter can be used to exploit the logical hierarchy
in the input stream for modular specifications. We
first present the core language with a small set of
combinators, formal semantics, and a decidable type
system. We then show how to express a number of common
patterns with illustrative examples. Our compilation
algorithm translates the high-level query into a
streaming algorithm with precise complexity bounds on
per-item processing time and total memory footprint. We
also show how to integrate approximation algorithms
into our framework. We report on an implementation in
Java, and evaluate it with respect to existing
high-performance engines for processing streaming data.
Our experimental evaluation shows that (1) StreamQRE
allows more natural and succinct specification of
queries compared to existing frameworks, (2) the
throughput of our implementation is higher than
comparable systems (for example, two-to-four times
greater than RxJava), and (3) the approximation
algorithms supported by our implementation can lead to
substantial memory savings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '17 conference proceedings.",
}
@Article{Weirich:2017:IDT,
author = "Stephanie Weirich",
title = "The influence of dependent types (keynote)",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "1--1",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009923",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "What has dependent type theory done for Haskell? In
this talk, I will discuss the influence of dependent
types on the design of programming languages and on the
practice of functional programmers. Over the past ten
years, the Glasgow Haskell compiler has adopted several
type system features inspired by dependent type theory.
However, this process has not been a direct
translation; working in the context of an existing
language has lead us to new designs in the semantics of
dependent types. I will take a close look at what we
have achieved in GHC and discuss what we have learned
from this experiment: what works now, what doesn't work
yet, and what has surprised us along the way.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Turon:2017:RPP,
author = "Aaron Turon",
title = "{Rust}: from {POPL} to practice (keynote)",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "2--2",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3011999",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In 2015, a language based fundamentally on
substructural typing --- Rust --- hit its 1.0 release,
and less than a year later it has been put into
production use in a number of tech companies, including
some household names. The language has started a trend,
with several other mainstream languages, including C++
and Swift, in the early stages of incorporating ideas
about ownership. How did this come about? Rust's core
focus is safe systems programming. It does not require
a runtime system or garbage collector, but guarantees
memory safety. It does not stipulate any particular
style of concurrent programming, but instead provides
the tools needed to guarantee data race freedom even
when doing low-level shared-state concurrency. It
allows you to build up high-level abstractions without
paying a tax; its compilation model ensures that the
abstractions boil away. These benefits derive from two
core aspects of Rust: its ownership system (based on
substructural typing) and its trait system (a
descendant of Haskell's typeclasses). The talk will
cover these two pillars of Rust design, with particular
attention to the key innovations that make the language
usable at scale. It will highlight the implications for
concurrency, where Rust provides a unique perspective.
It will also touch on aspects of Rust's development
that tend to get less attention within the POPL
community: Rust's governance and open development
process, and design considerations around language and
library evolution. Finally, it will mention a few of
the myriad open research questions around Rust.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Alglave:2017:OPI,
author = "Jade Alglave and Patrick Cousot",
title = "Ogre and {Pythia}: an invariance proof method for weak
consistency models",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "3--18",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009883",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We design an invariance proof method for concurrent
programs parameterised by a weak consistency model. The
calculational design of the invariance proof method is
by abstract interpretation of a truly parallel analytic
semantics. This generalises the methods by Lamport and
Owicki-Gries for sequential consistency. We use cat as
an example of language to write consistency
specifications of both concurrent programs and machine
architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Germane:2017:PEA,
author = "Kimball Germane and Matthew Might",
title = "A posteriori environment analysis with {Pushdown Delta
CFA}",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "19--31",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009899",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Flow-driven higher-order inlining is blocked by free
variables, yet current theories of environment analysis
cannot reliably cope with multiply-bound variables. One
of these, $ \Delta $CFA, is a promising theory based on
stack change but is undermined by its finite-state
model of the stack. We present Pushdown $ \Delta $CFA
which takes a $ \Delta $CFA -approach to pushdown
models of control flow and can cope with multiply-bound
variables, even in the face of recursion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Li:2017:SDC,
author = "Huisong Li and Fran{\c{c}}ois Berenger and Bor-Yuh
Evan Chang and Xavier Rival",
title = "Semantic-directed clumping of disjunctive abstract
states",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "32--45",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009881",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To infer complex structural invariants, shape analyses
rely on expressive families of logical properties. Many
such analyses manipulate abstract memory states that
consist of separating conjunctions of basic predicates
describing atomic blocks or summaries. Moreover, they
use finite disjunctions of abstract memory states in
order to account for dissimilar shapes. Disjunctions
should be kept small for the sake of scalability,
though precision often requires to keep additional case
splits. In this context, deciding when and how to merge
case splits and to replace them with summaries is
critical both for the precision and for the efficiency.
Existing techniques use sets of syntactic rules, which
are tedious to design and prone to failure. In this
paper, we design a semantic criterion to clump abstract
states based on their silhouette which applies not only
to the conservative union of disjuncts, but also to the
weakening of separating conjunction of memory
predicates into inductive summaries. Our approach
allows to define union and widening operators that aim
at preserving the case splits that are required for the
analysis to succeed. We implement this approach in the
MemCAD analyzer, and evaluate it on real-world C codes
from existing libraries, including programs dealing
with doubly linked lists, red-black trees and
AVL-trees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Singh:2017:FPA,
author = "Gagandeep Singh and Markus P{\"u}schel and Martin
Vechev",
title = "Fast polyhedra abstract domain",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "46--59",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009885",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Numerical abstract domains are an important ingredient
of modern static analyzers used for verifying critical
program properties (e.g., absence of buffer overflow or
memory safety). Among the many numerical domains
introduced over the years, Polyhedra is the most
expressive one, but also the most expensive: it has
worst-case exponential space and time complexity. As a
consequence, static analysis with the Polyhedra domain
is thought to be impractical when applied to large
scale, real world programs. In this paper, we present a
new approach and a complete implementation for speeding
up Polyhedra domain analysis. Our approach does not
lose precision, and for many practical cases, is orders
of magnitude faster than state-of-the-art solutions.
The key insight underlying our work is that polyhedra
arising during analysis can usually be kept decomposed,
thus considerably reducing the overall complexity. We
first present the theory underlying our approach, which
identifies the interaction between partitions of
variables and domain operators. Based on the theory we
develop new algorithms for these operators that work
with decomposed polyhedra. We implemented these
algorithms using the same interface as existing
libraries, thus enabling static analyzers to use our
implementation with little effort. In our evaluation,
we analyze large benchmarks from the popular software
verification competition, including Linux device
drivers with over 50K lines of code. Our experimental
results demonstrate massive gains in both space and
time: we show end-to-end speedups of two to five orders
of magnitude compared to state-of-the-art Polyhedra
implementations as well as significant memory gains, on
all larger benchmarks. In fact, in many cases our
analysis terminates in seconds where prior code runs
out of memory or times out after 4 hours. We believe
this work is an important step in making the Polyhedra
abstract domain both feasible and practically usable
for handling large, real-world programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Dolan:2017:PST,
author = "Stephen Dolan and Alan Mycroft",
title = "Polymorphism, subtyping, and type inference in
{MLsub}",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "60--72",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009882",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a type system combining subtyping and
ML-style parametric polymorphism. Unlike previous work,
our system supports type inference and has compact
principal types. We demonstrate this system in the
minimal language MLsub, which types a strict superset
of core ML programs. This is made possible by keeping a
strict separation between the types used to describe
inputs and those used to describe outputs, and
extending the classical unification algorithm to handle
subtyping constraints between these input and output
types. Principal types are kept compact by type
simplification, which exploits deep connections between
subtyping and the algebra of regular languages. An
implementation is available online.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Grigore:2017:JGT,
author = "Radu Grigore",
title = "{Java} generics are {Turing} complete",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "73--85",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009871",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes a reduction from the halting
problem of Turing machines to subtype checking in Java.
It follows that subtype checking in Java is
undecidable, which answers a question posed by Kennedy
and Pierce in 2007. It also follows that Java's type
checker can recognize any recursive language, which
improves a result of Gill and Levy from 2016. The
latter point is illustrated by a parser generator for
fluent interfaces.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Omar:2017:HBT,
author = "Cyrus Omar and Ian Voysey and Michael Hilton and
Jonathan Aldrich and Matthew A. Hammer",
title = "{Hazelnut}: a bidirectionally typed structure editor
calculus",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "86--99",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009900",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Structure editors allow programmers to edit the tree
structure of a program directly. This can have
cognitive benefits, particularly for novice and
end-user programmers. It also simplifies matters for
tool designers, because they do not need to contend
with malformed program text. This paper introduces
Hazelnut, a structure editor based on a small
bidirectionally typed lambda calculus extended with
holes and a cursor. Hazelnut goes one step beyond
syntactic well-formedness: its edit actions operate
over statically meaningful incomplete terms.
Na{\~A}{\=v}ely, this would force the programmer to
construct terms in a rigid ``outside-in'' manner. To
avoid this problem, the action semantics automatically
places terms assigned a type that is inconsistent with
the expected type inside a hole. This meaningfully
defers the type consistency check until the term inside
the hole is finished. Hazelnut is not intended as an
end-user tool itself. Instead, it serves as a
foundational account of typed structure editing. To
that end, we describe how Hazelnut's rich metatheory,
which we have mechanized using the Agda proof
assistant, serves as a guide when we extend the
calculus to include binary sum types. We also discuss
various interpretations of holes, and in so doing
reveal connections with gradual typing and contextual
modal type theory, the Curry--Howard interpretation of
contextual modal logic. Finally, we discuss how
Hazelnut's semantics lends itself to implementation as
an event-based functional reactive program. Our simple
reference implementation is written using
js_of_ocaml.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Crary:2017:MAP,
author = "Karl Crary",
title = "Modules, abstraction, and parametric polymorphism",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "100--113",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009892",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reynolds's Abstraction theorem forms the mathematical
foundation for data abstraction. His setting was the
polymorphic lambda calculus. Today, many modern
languages, such as the ML family, employ rich module
systems designed to give more expressive support for
data abstraction than the polymorphic lambda calculus,
but analogues of the Abstraction theorem for such
module systems have lagged far behind. We give an
account of the Abstraction theorem for a modern module
calculus supporting generative and applicative
functors, higher-order functors, sealing, and
translucent signatures. The main issues to be overcome
are: (1) the fact that modules combine both types and
terms, so they must be treated as both simultaneously,
(2) the effect discipline that models the distinction
between transparent and opaque modules, and (3) a very
rich language of type constructors supporting singleton
kinds. We define logical equivalence for modules and
show that it coincides with contextual equivalence.
This substantiates the folk theorem that modules are
good for data abstraction. All our proofs are
formalized in Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Lampropoulos:2017:BLL,
author = "Leonidas Lampropoulos and Diane Gallois-Wong and
Catalin Hritcu and John Hughes and Benjamin C. Pierce
and Li-yao Xia",
title = "Beginner's luck: a language for property-based
generators",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "114--129",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009868",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Property-based random testing {\`a} la QuickCheck
requires building efficient generators for
well-distributed random data satisfying complex logical
predicates, but writing these generators can be
difficult and error prone. We propose a domain-specific
language in which generators are conveniently expressed
by decorating predicates with lightweight annotations
to control both the distribution of generated values
and the amount of constraint solving that happens
before each variable is instantiated. This language,
called Luck, makes generators easier to write, read,
and maintain. We give Luck a formal semantics and prove
several fundamental properties, including the soundness
and completeness of random generation with respect to a
standard predicate semantics. We evaluate Luck on
common examples from the property-based testing
literature and on two significant case studies, showing
that it can be used in complex domains with comparable
bug-finding effectiveness and a significant reduction
in testing code size compared to handwritten
generators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Shan:2017:EBI,
author = "Chung-chieh Shan and Norman Ramsey",
title = "Exact {Bayesian} inference by symbolic
disintegration",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "130--144",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009852",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Bayesian inference, of posterior knowledge from prior
knowledge and observed evidence, is typically defined
by Bayes's rule, which says the posterior multiplied by
the probability of an observation equals a joint
probability. But the observation of a continuous
quantity usually has probability zero, in which case
Bayes's rule says only that the unknown times zero is
zero. To infer a posterior distribution from a
zero-probability observation, the statistical notion of
disintegration tells us to specify the observation as
an expression rather than a predicate, but does not
tell us how to compute the posterior. We present the
first method of computing a disintegration from a
probabilistic program and an expression of a quantity
to be observed, even when the observation has
probability zero. Because the method produces an exact
posterior term and preserves a semantics in which
monadic terms denote measures, it composes with other
inference methods in a modular way --- without
sacrificing accuracy or performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Chatterjee:2017:SIP,
author = "Krishnendu Chatterjee and Petr Novotn{\'y} and
{\Eth}orde Zikeli{\'c}",
title = "Stochastic invariants for probabilistic termination",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "145--160",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009873",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Termination is one of the basic liveness properties,
and we study the termination problem for probabilistic
programs with real-valued variables. Previous works
focused on the qualitative problem that asks whether an
input program terminates with probability{\^A} 1
(almost-sure termination). A powerful approach for this
qualitative problem is the notion of ranking
supermartingales with respect to a given set of
invariants. The quantitative problem (probabilistic
termination) asks for bounds on the termination
probability, and this problem has not been addressed
yet. A fundamental and conceptual drawback of the
existing approaches to address probabilistic
termination is that even though the supermartingales
consider the probabilistic behaviour of the programs,
the invariants are obtained completely ignoring the
probabilistic aspect (i.e., the invariants are obtained
considering all behaviours with no information about
the probability). In this work we address the
probabilistic termination problem for linear-arithmetic
probabilistic programs with nondeterminism. We formally
define the notion of stochastic invariants, which are
constraints along with a probability bound that the
constraints hold. We introduce a concept of repulsing
supermartingales. First, we show that repulsing
supermartingales can be used to obtain bounds on the
probability of the stochastic invariants. Second, we
show the effectiveness of repulsing supermartingales in
the following three ways: (1){\^A} With a combination
of ranking and repulsing supermartingales we can
compute lower bounds on the probability of termination;
(2){\^A} repulsing supermartingales provide witnesses
for refutation of almost-sure termination; and (3){\^A}
with a combination of ranking and repulsing
supermartingales we can establish persistence
properties of probabilistic programs. Along with our
conceptual contributions, we establish the following
computational results: First, the synthesis of a
stochastic invariant which supports some ranking
supermartingale and at the same time admits a repulsing
supermartingale can be achieved via reduction to the
existential first-order theory of reals, which
generalizes existing results from the non-probabilistic
setting. Second, given a program with ``strict
invariants'' (e.g., obtained via abstract
interpretation) and a stochastic invariant, we can
check in polynomial time whether there exists a linear
repulsing supermartingale w.r.t. the stochastic
invariant (via reduction to LP). We also present
experimental evaluation of our approach on academic
examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Barthe:2017:CPP,
author = "Gilles Barthe and Benjamin Gr{\'e}goire and Justin Hsu
and Pierre-Yves Strub",
title = "Coupling proofs are probabilistic product programs",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "161--174",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009896",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Couplings are a powerful mathematical tool for
reasoning about pairs of probabilistic processes.
Recent developments in formal verification identify a
close connection between couplings and pRHL, a
relational program logic motivated by applications to
provable security, enabling formal construction of
couplings from the probability theory literature.
However, existing work using pRHL merely shows
existence of a coupling and does not give a way to
prove quantitative properties about the coupling,
needed to reason about mixing and convergence of
probabilistic processes. Furthermore, pRHL is
inherently incomplete, and is not able to capture some
advanced forms of couplings such as shift couplings. We
address both problems as follows. First, we define an
extension of pRHL, called x-pRHL, which explicitly
constructs the coupling in a pRHL derivation in the
form of a probabilistic product program that simulates
two correlated runs of the original program. Existing
verification tools for probabilistic programs can then
be directly applied to the probabilistic product to
prove quantitative properties of the coupling. Second,
we equip x-pRHL with a new rule for while loops, where
reasoning can freely mix synchronized and
unsynchronized loop iterations. Our proof rule can
capture examples of shift couplings, and the logic is
relatively complete for deterministic programs. We show
soundness of x-PRHL and use it to analyze two classes
of examples. First, we verify rapid mixing using
different tools from coupling: standard coupling, shift
coupling, and path coupling, a compositional principle
for combining local couplings into a global coupling.
Second, we verify (approximate) equivalence between a
source and an optimized program for several instances
of loop optimizations from the literature.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Kang:2017:PSR,
author = "Jeehoon Kang and Chung-Kil Hur and Ori Lahav and
Viktor Vafeiadis and Derek Dreyer",
title = "A promising semantics for relaxed-memory concurrency",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "175--189",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009850",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite many years of research, it has proven very
difficult to develop a memory model for concurrent
programming languages that adequately balances the
conflicting desiderata of programmers, compilers, and
hardware. In this paper, we propose the first relaxed
memory model that (1) accounts for a broad spectrum of
features from the C++11 concurrency model, (2) is
implementable, in the sense that it provably validates
many standard compiler optimizations and reorderings,
as well as standard compilation schemes to x86-TSO and
Power, (3) justifies simple invariant-based reasoning,
thus demonstrating the absence of bad
``out-of-thin-air'' behaviors, (4) supports ``DRF''
guarantees, ensuring that programmers who use
sufficient synchronization need not understand the full
complexities of relaxed-memory semantics, and (5)
defines the semantics of racy programs without relying
on undefined behaviors, which is a prerequisite for
applicability to type-safe languages like Java. The key
novel idea behind our model is the notion of
*promises*: a thread may promise to execute a write in
the future, thus enabling other threads to read from
that write out of order. Crucially, to prevent
out-of-thin-air behaviors, a promise step requires a
thread-local certification that it will be possible to
execute the promised write even in the absence of the
promise. To establish confidence in our model, we have
formalized most of our key results in Coq.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Wickerson:2017:ACM,
author = "John Wickerson and Mark Batty and Tyler Sorensen and
George A. Constantinides",
title = "Automatically comparing memory consistency models",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "190--204",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009838",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A memory consistency model (MCM) is the part of a
programming language or computer architecture
specification that defines which values can legally be
read from shared memory locations. Because MCMs take
into account various optimisations employed by
architectures and compilers, they are often complex and
counterintuitive, which makes them challenging to
design and to understand. We identify four tasks
involved in designing and understanding MCMs:
generating conformance tests, distinguishing two MCMs,
checking compiler optimisations, and checking compiler
mappings. We show that all four tasks are instances of
a general constraint-satisfaction problem to which the
solution is either a program or a pair of programs.
Although this problem is intractable for automatic
solvers when phrased over programs directly, we show
how to solve analogous constraints over program
executions, and then construct programs that satisfy
the original constraints. Our technique, which is
implemented in the Alloy modelling framework, is
illustrated on several software- and architecture-level
MCMs, both axiomatically and operationally defined. We
automatically recreate several known results, often in
a simpler form, including: distinctions between
variants of the C11 MCM; a failure of the `SC-DRF
guarantee' in an early C11 draft; that x86 is
`multi-copy atomic' and Power is not; bugs in common
C11 compiler optimisations; and bugs in a compiler
mapping from OpenCL to AMD-style GPUs. We also use our
technique to develop and validate a new MCM for NVIDIA
GPUs that supports a natural mapping from OpenCL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Krebbers:2017:IPH,
author = "Robbert Krebbers and Amin Timany and Lars Birkedal",
title = "Interactive proofs in higher-order concurrent
separation logic",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "205--217",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009855",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When using a proof assistant to reason in an embedded
logic --- like separation logic --- one cannot benefit
from the proof contexts and basic tactics of the proof
assistant. This results in proofs that are at a too low
level of abstraction because they are cluttered with
bookkeeping code related to manipulating the object
logic. In this paper, we introduce a so-called proof
mode that extends the Coq proof assistant with (spatial
and non-spatial) named proof contexts for the object
logic. We show that thanks to these contexts we can
implement high-level tactics for introduction and
elimination of the connectives of the object logic, and
thereby make reasoning in the embedded logic as
seamless as reasoning in the meta logic of the proof
assistant. We apply our method to Iris: a state of the
art higher-order impredicative concurrent separation
logic. We show that our method is very general, and is
not just limited to program verification. We
demonstrate its generality by formalizing correctness
proofs of fine-grained concurrent algorithms, derived
constructs of the Iris logic, and a unary and binary
logical relation for a language with concurrency,
higher-order store, polymorphism, and recursive types.
This is the first formalization of a binary logical
relation for such an expressive language. We also show
how to use the logical relation to prove contextual
refinement of fine-grained concurrent algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Krogh-Jespersen:2017:RMT,
author = "Morten Krogh-Jespersen and Kasper Svendsen and Lars
Birkedal",
title = "A relational model of types-and-effects in
higher-order concurrent separation logic",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "218--231",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009877",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recently we have seen a renewed interest in
programming languages that tame the complexity of state
and concurrency through refined type systems with more
fine-grained control over effects. In addition to
simplifying reasoning and eliminating whole classes of
bugs, statically tracking effects opens the door to
advanced compiler optimizations. In this paper we
present a relational model of a type-and-effect system
for a higher-order, concurrent programming language.
The model precisely captures the semantic invariants
expressed by the effect annotations. We demonstrate
that these invariants are strong enough to prove
advanced program transformations, including automatic
parallelization of expressions with suitably disjoint
effects. The model also supports refinement proofs
between abstract data types implementations with
different internal data representations, including
proofs that fine-grained concurrent algorithms refine
their coarse-grained counterparts. This is the first
model for such an expressive language that supports
both effect-based optimizations and data abstraction.
The logical relation is defined in Iris, a
state-of-the-art higher-order concurrent separation
logic. This greatly simplifies proving well-definedness
of the logical relation and also provides us with a
powerful logic for reasoning in the model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{DAntoni:2017:MSO,
author = "Loris D'Antoni and Margus Veanes",
title = "Monadic second-order logic on finite sequences",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "232--245",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009844",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We extend the weak monadic second-order logic of one
successor on finite strings (M2L-STR) to symbolic
alphabets by allowing character predicates to range
over decidable quantifier free theories instead of
finite alphabets. We call this logic, which is able to
describe sequences over complex and potentially
infinite domains, symbolic M2L-STR (S-M2L-STR). We then
present a decision procedure for S-M2L-STR based on a
reduction to symbolic finite automata, a decidable
extension of finite automata that allows transitions to
carry predicates and can therefore model symbolic
alphabets. The reduction constructs a symbolic
automaton over an alphabet consisting of pairs of
symbols where the first element of the pair is a symbol
in the original formula's alphabet, while the second
element is a bit-vector. To handle this modified
alphabet we show that the Cartesian product of two
decidable Boolean algebras (e.g., the formula's one and
the bit-vector's one) also forms a decidable Boolean
algebras. To make the decision procedure practical, we
propose two efficient representations of the Cartesian
product of two Boolean algebras, one based on algebraic
decision diagrams and one on a variant of Shannon
expansions. Finally, we implement our decision
procedure and evaluate it on more than 10,000 formulas.
Despite the generality, our implementation has
comparable performance with the state-of-the-art
M2L-STR solvers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Kobayashi:2017:RBH,
author = "Naoki Kobayashi and {\'E}tienne Lozes and Florian
Bruse",
title = "On the relationship between higher-order recursion
schemes and higher-order fixpoint logic",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "246--259",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009854",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We study the relationship between two kinds of
higher-order extensions of model checking: HORS model
checking, where models are extended to higher-order
recursion schemes, and HFL model checking, where the
logic is extended to higher-order modal fixpoint logic.
Those extensions have been independently studied until
recently, and the former has been applied to
higher-order program verification. We show that there
exist (arguably) natural reductions between the two
problems. To prove the correctness of the translation
from HORS to HFL model checking, we establish a
type-based characterization of HFL model checking,
which should be of independent interest. The results
reveal a close relationship between the two problems,
enabling cross-fertilization of the two research
threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Kovacs:2017:CTQ,
author = "Laura Kov{\'a}cs and Simon Robillard and Andrei
Voronkov",
title = "Coming to terms with quantified reasoning",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "260--270",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009887",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The theory of finite term algebras provides a natural
framework to describe the semantics of functional
languages. The ability to efficiently reason about term
algebras is essential to automate program analysis and
verification for functional or imperative programs over
inductively defined data types such as lists and trees.
However, as the theory of finite term algebras is not
finitely axiomatizable, reasoning about quantified
properties over term algebras is challenging. In this
paper we address full first-order reasoning about
properties of programs manipulating term algebras, and
describe two approaches for doing so by using
first-order theorem proving. Our first method is a
conservative extension of the theory of term algebras
using a finite number of statements, while our second
method relies on extending the superposition calculus
of first-order theorem provers with additional
inference rules. We implemented our work in the
first-order theorem prover Vampire and evaluated it on
a large number of inductive datatype benchmarks, as
well as game theory constraints. Our experimental
results show that our methods are able to find proofs
for many hard problems previously unsolved by
state-of-the-art methods. We also show that Vampire
implementing our methods outperforms existing SMT
solvers able to deal with inductive data types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Scully:2017:POA,
author = "Ziv Scully and Adam Chlipala",
title = "A program optimization for automatic database result
caching",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "271--284",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009891",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most popular Web applications rely on persistent
databases based on languages like SQL for declarative
specification of data models and the operations that
read and modify them. As applications scale up in user
base, they often face challenges responding quickly
enough to the high volume of requests. A common aid is
caching of database results in the application's memory
space, taking advantage of program-specific knowledge
of which caching schemes are sound and useful, embodied
in handwritten modifications that make the program less
maintainable. These modifications also require
nontrivial reasoning about the read-write dependencies
across operations. In this paper, we present a compiler
optimization that automatically adds sound SQL caching
to Web applications coded in the Ur/Web domain-specific
functional language, with no modifications required to
source code. We use a custom cache implementation that
supports concurrent operations without compromising the
transactional semantics of the database abstraction.
Through experiments with microbenchmarks and production
Ur/Web applications, we show that our optimization in
many cases enables an easy doubling or more of an
application's throughput, requiring nothing more than
passing an extra command-line flag to the compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Kiselyov:2017:SFC,
author = "Oleg Kiselyov and Aggelos Biboudis and Nick Palladinos
and Yannis Smaragdakis",
title = "Stream fusion, to completeness",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "285--299",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009880",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Stream processing is mainstream (again): Widely-used
stream libraries are now available for virtually all
modern OO and functional languages, from Java to C\# to
Scala to OCaml to Haskell. Yet expressivity and
performance are still lacking. For instance, the
popular, well-optimized Java 8 streams do not support
the zip operator and are still an order of magnitude
slower than hand-written loops. We present the first
approach that represents the full generality of stream
processing and eliminates overheads, via the use of
staging. It is based on an unusually rich semantic
model of stream interaction. We support any combination
of zipping, nesting (or flat-mapping), sub-ranging,
filtering, mapping-of finite or infinite streams. Our
model captures idiosyncrasies that a programmer uses in
optimizing stream pipelines, such as rate differences
and the choice of a ``for'' vs. ``while'' loops. Our
approach delivers hand-written-like code, but
automatically. It explicitly avoids the reliance on
black-box optimizers and sufficiently-smart compilers,
offering highest, guaranteed and portable performance.
Our approach relies on high-level concepts that are
then readily mapped into an implementation.
Accordingly, we have two distinct implementations: an
OCaml stream library, staged via MetaOCaml, and a Scala
library for the JVM, staged via LMS. In both cases, we
derive libraries richer and simultaneously many tens of
times faster than past work. We greatly exceed in
performance the standard stream libraries available in
Java, Scala and OCaml, including the well-optimized
Java 8 streams.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Chiang:2017:RFP,
author = "Wei-Fan Chiang and Mark Baranowski and Ian Briggs and
Alexey Solovyev and Ganesh Gopalakrishnan and Zvonimir
Rakamari{\'c}",
title = "Rigorous floating-point mixed-precision tuning",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "300--315",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009846",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Virtually all real-valued computations are carried out
using floating-point data types and operations. The
precision of these data types must be set with the
goals of reducing the overall round-off error, but also
emphasizing performance improvements. Often, a
mixed-precision allocation achieves this optimum;
unfortunately, there are no techniques available to
compute such allocations and conservatively meet a
given error target across all program inputs. In this
work, we present a rigorous approach to precision
allocation based on formal analysis via Symbolic Taylor
Expansions, and error analysis based on interval
functions. This approach is implemented in an automated
tool called FPTuner that generates and solves a
quadratically constrained quadratic program to obtain a
precision-annotated version of the given expression.
FPTuner automatically introduces all the requisite
precision up and down casting operations. It also
allows users to flexibly control precision allocation
using constraints to cap the number of high precision
operators as well as group operators to allocate the
same precision to facilitate vectorization. We evaluate
FPTuner by tuning several benchmarks and measuring the
proportion of lower precision operators allocated as we
increase the error threshold. We also measure the
reduction in energy consumption resulting from
executing mixed-precision tuned code on a real hardware
platform. We observe significant energy savings in
response to mixed-precision tuning, but also observe
situations where unexpected compiler behaviors thwart
intended optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Cicek:2017:RCA,
author = "Ezgi {\c{C}}i{\c{c}}ek and Gilles Barthe and Marco
Gaboardi and Deepak Garg and Jan Hoffmann",
title = "Relational cost analysis",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "316--329",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009858",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Establishing quantitative bounds on the execution cost
of programs is essential in many areas of computer
science such as complexity analysis, compiler
optimizations, security and privacy. Techniques based
on program analysis, type systems and abstract
interpretation are well-studied, but methods for
analyzing how the execution costs of two programs
compare to each other have not received attention.
Naively combining the worst and best case execution
costs of the two programs does not work well in many
cases because such analysis forgets the similarities
between the programs or the inputs. In this work, we
propose a relational cost analysis technique that is
capable of establishing precise bounds on the
difference in the execution cost of two programs by
making use of relational properties of programs and
inputs. We develop , a refinement type and effect
system for a higher-order functional language with
recursion and subtyping. The key novelty of our
technique is the combination of relational refinements
with two modes of typing --- relational typing for
reasoning about similar computations/inputs and unary
typing for reasoning about unrelated
computations/inputs. This combination allows us to
analyze the execution cost difference of two programs
more precisely than a naive non-relational approach. We
prove our type system sound using a semantic model
based on step-indexed unary and binary logical
relations accounting for non-relational and relational
reasoning principles with their respective costs. We
demonstrate the precision and generality of our
technique through examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Madhavan:2017:CBR,
author = "Ravichandhran Madhavan and Sumith Kulal and Viktor
Kuncak",
title = "Contract-based resource verification for higher-order
functions with memoization",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "330--343",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009874",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a new approach for specifying and verifying
resource utilization of higher-order functional
programs that use lazy evaluation and memoization. In
our approach, users can specify the desired resource
bound as templates with numerical holes e.g. as steps
\leq ? * size(l) + ? in the contracts of functions.
They can also express invariants necessary for
establishing the bounds that may depend on the state of
memoization. Our approach operates in two phases: first
generating an instrumented first-order program that
accurately models the higher-order control flow and the
effects of memoization on resources using sets,
algebraic datatypes and mutual recursion, and then
verifying the contracts of the first-order program by
producing verification conditions of the form $ \exists
\forall $ using an extended assume/guarantee reasoning.
We use our approach to verify precise bounds on
resources such as evaluation steps and number of
heap-allocated objects on 17 challenging data
structures and algorithms. Our benchmarks, comprising
of 5K lines of functional Scala code, include lazy
mergesort, Okasaki's real-time queue and deque data
structures that rely on aliasing of references to
first-class functions; lazy data structures based on
numerical representations such as the conqueue data
structure of Scala's data-parallel library, cyclic
streams, as well as dynamic programming algorithms such
as knapsack and Viterbi. Our evaluations show that when
averaged over all benchmarks the actual runtime
resource consumption is 80\% of the value inferred by
our tool when estimating the number of evaluation
steps, and is 88\% for the number of heap-allocated
objects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Zhang:2017:CSD,
author = "Qirun Zhang and Zhendong Su",
title = "Context-sensitive data-dependence analysis via linear
conjunctive language reachability",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "344--358",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009848",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many program analysis problems can be formulated as
graph reachability problems. In the literature,
context-free language (CFL) reachability has been the
most popular formulation and can be computed in
subcubic time. The context-sensitive data-dependence
analysis is a fundamental abstraction that can express
a broad range of program analysis problems. It
essentially describes an interleaved
matched-parenthesis language reachability problem. The
language is not context-free, and the problem is
well-known to be undecidable. In practice, many program
analyses adopt CFL-reachability to exactly model the
matched parentheses for either context-sensitivity or
structure-transmitted data-dependence, but not both.
Thus, the CFL-reachability formulation for
context-sensitive data-dependence analysis is
inherently an approximation. To support more precise
and scalable analyses, this paper introduces linear
conjunctive language (LCL) reachability, a new,
expressive class of graph reachability. LCL not only
contains the interleaved matched-parenthesis language,
but is also closed under all set-theoretic operations.
Given a graph with n nodes and m edges, we propose an O
( mn ) time approximation algorithm for solving
all-pairs LCL-reachability, which is asymptotically
better than known CFL-reachability algorithms. Our
formulation and algorithm offer a new perspective on
attacking the aforementioned undecidable problem ---
the LCL-reachability formulation is exact, while the
LCL-reachability algorithm yields a sound
approximation. We have applied the LCL-reachability
framework to two existing client analyses. The
experimental results show that the LCL-reachability
framework is both more precise and scalable than the
traditional CFL-reachability framework. This paper
opens up the opportunity to exploit LCL-reachability in
program analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Hoffmann:2017:TAR,
author = "Jan Hoffmann and Ankush Das and Shu-Chun Weng",
title = "Towards automatic resource bound analysis for
{OCaml}",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "359--373",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009842",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This article presents a resource analysis system for
OCaml programs. The system automatically derives
worst-case resource bounds for higher-order polymorphic
programs with user-defined inductive types. The
technique is parametric in the resource and can derive
bounds for time, memory allocations and energy usage.
The derived bounds are multivariate resource
polynomials which are functions of different size
parameters that depend on the standard OCaml types.
Bound inference is fully automatic and reduced to a
linear optimization problem that is passed to an
off-the-shelf LP solver. Technically, the analysis
system is based on a novel multivariate automatic
amortized resource analysis (AARA). It builds on
existing work on linear AARA for higher-order programs
with user-defined inductive types and on multivariate
AARA for first-order programs with built-in lists and
binary trees. This is the first amortized analysis,
that automatically derives polynomial bounds for
higher-order functions and polynomial bounds that
depend on user-defined inductive types. Moreover, the
analysis handles a limited form of side effects and
even outperforms the linear bound inference of previous
systems. At the same time, it preserves the
expressivity and efficiency of existing AARA
techniques. The practicality of the analysis system is
demonstrated with an implementation and integration
with Inria's OCaml compiler. The implementation is used
to automatically derive resource bounds for 411
functions and 6018 lines of code derived from OCaml
libraries, the CompCert compiler, and implementations
of textbook algorithms. In a case study, the system
infers bounds on the number of queries that are sent by
OCaml programs to DynamoDB, a commercial NoSQL cloud
database service.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Scherer:2017:DES,
author = "Gabriel Scherer",
title = "Deciding equivalence with sums and the empty type",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "374--386",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009901",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The logical technique of focusing can be applied to
the $ \lambda $ -calculus; in a simple type system with
atomic types and negative type formers (functions,
products, the unit type), its normal forms coincide
with {\^I}$^{}^2${\^I}$ \cdot $-normal forms.
Introducing a saturation phase gives a notion of
quasi-normal forms in presence of positive types (sum
types and the empty type). This rich structure let us
prove the decidability of {\^I}$^{}^2${\^I}$ \cdot
$-equivalence in presence of the empty type, the fact
that it coincides with contextual equivalence, and with
set-theoretic equality in all finite models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Ilik:2017:ELN,
author = "Danko Ilik",
title = "The exp--log normal form of types: decomposing
extensional equality and representing terms compactly",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "387--399",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009841",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Lambda calculi with algebraic data types lie at the
core of functional programming languages and proof
assistants, but conceal at least two fundamental
theoretical problems already in the presence of the
simplest non-trivial data type, the sum type. First, we
do not know of an explicit and implemented algorithm
for deciding the beta-eta-equality of terms---and this
in spite of the first decidability results proven two
decades ago. Second, it is not clear how to decide when
two types are essentially the same, i.e. isomorphic, in
spite of the meta-theoretic results on decidability of
the isomorphism. In this paper, we present the exp-log
normal form of types---derived from the representation
of exponential polynomials via the unary exponential
and logarithmic functions---that any type built from
arrows, products, and sums, can be isomorphically
mapped to. The type normal form can be used as a simple
heuristic for deciding type isomorphism, thanks to the
fact that it is a systematic application of the
high-school identities. We then show that the type
normal form allows to reduce the standard beta-eta
equational theory of the lambda calculus to a
specialized version of itself, while preserving
completeness of the equality on terms. We end by
describing an alternative representation of normal
terms of the lambda calculus with sums, together with a
Coq-implemented converter into/from our new term
calculus. The difference with the only other previously
implemented heuristic for deciding interesting
instances of eta-equality by Balat, Di Cosmo, and
Fiore, is that we exploits the type information of
terms substantially and this often allows us to obtain
a canonical representation of terms without performing
a sophisticated term analyses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Levy:2017:CI,
author = "Paul Blain Levy",
title = "Contextual isomorphisms",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "400--414",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009898",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "What is the right notion of ``isomorphism'' between
types, in a simple type theory? The traditional answer
is: a pair of terms that are inverse up to a specified
congruence. We firstly argue that, in the presence of
effects, this answer is too liberal and needs to be
restricted, using F{\~A}$ 1 / 4 $ hrmann's notion of
thunkability in the case of value types (as in
call-by-value), or using Munch-Maccagnoni's notion of
linearity in the case of computation types (as in
call-by-name). Yet that leaves us with different
notions of isomorphism for different kinds of type.
This situation is resolved by means of a new notion of
``contextual'' isomorphism (or morphism), analogous at
the level of types to contextual equivalence of terms.
A contextual morphism is a way of replacing one type
with the other wherever it may occur in a judgement, in
a way that is preserved by the action of any term with
holes. For types of pure $ \lambda $-calculus, we show
that a contextual morphism corresponds to a traditional
isomorphism. For value types, a contextual morphism
corresponds to a thunkable isomorphism, and for
computation types, to a linear isomorphism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Brown:2017:TSE,
author = "Matt Brown and Jens Palsberg",
title = "Typed self-evaluation via intensional type functions",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "415--428",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009853",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many popular languages have a self-interpreter, that
is, an interpreter for the language written in itself.
So far, work on polymorphically-typed self-interpreters
has concentrated on self-recognizers that merely
recover a program from its representation. A larger and
until now unsolved challenge is to implement a
polymorphically-typed self-evaluator that evaluates the
represented program and produces a representation of
the result. We present F$_\omega^{\mu i}$, the first $
\lambda $-calculus that supports a
polymorphically-typed self-evaluator. Our calculus
extends F$_\omega $ with recursive types and
intensional type functions and has decidable type
checking. Our key innovation is a novel implementation
of type equality proofs that enables us to define a
versatile representation of programs. Our results
establish a new category of languages that can support
polymorphically-typed self-evaluators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Flur:2017:MSC,
author = "Shaked Flur and Susmit Sarkar and Christopher Pulte
and Kyndylan Nienhuis and Luc Maranget and Kathryn E.
Gray and Ali Sezgin and Mark Batty and Peter Sewell",
title = "Mixed-size concurrency: {ARM}, {POWER}, {C\slash
C++11}, and {SC}",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "429--442",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009839",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Previous work on the semantics of relaxed
shared-memory concurrency has only considered the case
in which each load reads the data of exactly one store.
In practice, however, multiprocessors support
mixed-size accesses, and these are used by systems
software and (to some degree) exposed at the C/C++
language level. A semantic foundation for software,
therefore, has to address them. We investigate the
mixed-size behaviour of ARMv8 and IBM POWER
architectures and implementations: by experiment, by
developing semantic models, by testing the
correspondence between these, and by discussion with
ARM and IBM staff. This turns out to be surprisingly
subtle, and on the way we have to revisit the
fundamental concepts of coherence and sequential
consistency, which change in this setting. In
particular, we show that adding a memory barrier
between each instruction does not restore sequential
consistency. We go on to extend the C/C++11 model to
support non-atomic mixed-size memory accesses. This is
a necessary step towards semantics for real-world
shared-memory concurrent code, beyond litmus tests.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Lidbury:2017:DRD,
author = "Christopher Lidbury and Alastair F. Donaldson",
title = "Dynamic race detection for {C++11}",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "443--457",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009857",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The intricate rules for memory ordering and
synchronisation associated with the C/C++11 memory
model mean that data races can be difficult to
eliminate from concurrent programs. Dynamic data race
analysis can pinpoint races in large and complex
applications, but the state-of-the-art ThreadSanitizer
(tsan) tool for C/C++ considers only sequentially
consistent program executions, and does not correctly
model synchronisation between C/C++11 atomic
operations. We present a scalable dynamic data race
analysis for C/C++11 that correctly captures C/C++11
synchronisation, and uses instrumentation to support
exploration of a class of non sequentially consistent
executions. We concisely define the memory model
fragment captured by our instrumentation via a
restricted axiomatic semantics, and show that the
axiomatic semantics permits exactly those executions
explored by our instrumentation. We have implemented
our analysis in tsan, and evaluate its effectiveness on
benchmark programs, enabling a comparison with the
CDSChecker tool, and on two large and highly concurrent
applications: the Firefox and Chromium web browsers.
Our results show that our method can detect races that
are beyond the scope of the original tsan tool, and
that the overhead associated with applying our enhanced
instrumentation to large applications is tolerable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Brutschy:2017:SEC,
author = "Lucas Brutschy and Dimitar Dimitrov and Peter
M{\"u}ller and Martin Vechev",
title = "Serializability for eventual consistency: criterion,
analysis, and applications",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "458--472",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009895",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Developing and reasoning about systems using
eventually consistent data stores is a difficult
challenge due to the presence of unexpected behaviors
that do not occur under sequential consistency. A
fundamental problem in this setting is to identify a
correctness criterion that precisely captures intended
application behaviors yet is generic enough to be
applicable to a wide range of applications. In this
paper, we present such a criterion. More precisely, we
generalize conflict serializability to the setting of
eventual consistency. Our generalization is based on a
novel dependency model that incorporates two powerful
algebraic properties: commutativity and absorption.
These properties enable precise reasoning about
programs that employ high-level replicated data types,
common in modern systems. To apply our criterion in
practice, we also developed a dynamic analysis
algorithm and a tool that checks whether a given
program execution is serializable. We performed a
thorough experimental evaluation on two real-world use
cases: debugging cloud-backed mobile applications and
implementing clients of a popular eventually consistent
key-value store. Our experimental results indicate that
our criterion reveals harmful synchronization problems
in applications, is more effective at finding them than
prior approaches, and can be used for the development
of practical, eventually consistent applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Hoenicke:2017:TMM,
author = "Jochen Hoenicke and Rupak Majumdar and Andreas
Podelski",
title = "Thread modularity at many levels: a pearl in
compositional verification",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "473--485",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009893",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A thread-modular proof for the correctness of a
concurrent program is based on an inductive and
interference-free annotation of each thread. It is
well-known that the corresponding proof system is not
complete (unless one adds auxiliary variables). We
describe a hierarchy of proof systems where each level
k corresponds to a generalized notion of thread
modularity (level 1 corresponds to the original
notion). Each level is strictly more expressive than
the previous. Further, each level precisely captures
programs that can be proved using uniform Ashcroft
invariants with k universal quantifiers. We demonstrate
the usefulness of the hierarchy by giving a
compositional proof of the Mach shootdown algorithm for
TLB consistency. We show a proof at level 2 that shows
the algorithm is correct for an arbitrary number of
CPUs. However, there is no proof for the algorithm at
level 1 which does not involve auxiliary state.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Leijen:2017:TDC,
author = "Daan Leijen",
title = "Type directed compilation of row-typed algebraic
effects",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "486--499",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009872",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Algebraic effect handlers, introduced by Plotkin and
Power in 2002, are recently gaining in popularity as a
purely functional approach to modeling effects. In this
article, we give a full overview of practical algebraic
effects in the context of a compiled implementation in
the Koka language. In particular, we show how algebraic
effects generalize over common constructs like
exception handling, state, iterators and async-await.
We give an effective type inference algorithm based on
extensible effect rows using scoped labels, and a
direct operational semantics. Finally, we show an
efficient compilation scheme to common runtime
platforms (like JavaScript) using a type directed
selective CPS translation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Lindley:2017:DDD,
author = "Sam Lindley and Conor McBride and Craig McLaughlin",
title = "Do be do be do",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "500--514",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009897",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We explore the design and implementation of Frank, a
strict functional programming language with a
bidirectional effect type system designed from the
ground up around a novel variant of Plotkin and
Pretnar's effect handler abstraction. Effect handlers
provide an abstraction for modular effectful
programming: a handler acts as an interpreter for a
collection of commands whose interfaces are statically
tracked by the type system. However, Frank eliminates
the need for an additional effect handling construct by
generalising the basic mechanism of functional
abstraction itself. A function is simply the special
case of a Frank operator that interprets no commands.
Moreover, Frank's operators can be multihandlers which
simultaneously interpret commands from several sources
at once, without disturbing the direct style of
functional programming with values. Effect typing in
Frank employs a novel form of effect polymorphism which
avoid mentioning effect variables in source code. This
is achieved by propagating an ambient ability inwards,
rather than accumulating unions of potential effects
outwards. We introduce Frank by example, and then give
a formal account of the Frank type system and its
semantics. We introduce Core Frank by elaborating Frank
operators into functions, case expressions, and unary
handlers, and then give a sound small-step operational
semantics for Core Frank. Programming with effects and
handlers is in its infancy. We contribute an
exploration of future possibilities, particularly in
combination with other forms of rich type system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Ahman:2017:DMF,
author = "Danel Ahman and Catalin Hritcu and Kenji Maillard and
Guido Mart{\'\i}nez and Gordon Plotkin and Jonathan
Protzenko and Aseem Rastogi and Nikhil Swamy",
title = "{Dijkstra} monads for free",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "515--529",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009878",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dijkstra monads enable a dependent type theory to be
enhanced with support for specifying and verifying
effectful code via weakest preconditions. Together with
their closely related counterparts, Hoare monads, they
provide the basis on which verification tools like F*,
Hoare Type Theory (HTT), and Ynot are built. We show
that Dijkstra monads can be derived ``for free'' by
applying a continuation-passing style (CPS) translation
to the standard monadic definitions of the underlying
computational effects. Automatically deriving Dijkstra
monads in this way provides a correct-by-construction
and efficient way of reasoning about user-defined
effects in dependent type theories. We demonstrate
these ideas in EMF*, a new dependently typed calculus,
validating it via both formal proof and a prototype
implementation within F*. Besides equipping F* with a
more uniform and extensible effect system, EMF* enables
a novel mixture of intrinsic and extrinsic proofs
within F*.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Sekiyama:2017:SMC,
author = "Taro Sekiyama and Atsushi Igarashi",
title = "Stateful manifest contracts",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "530--544",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009875",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper studies hybrid contract verification for an
imperative higher-order language based on a so-called
manifest contract system. In manifest contract systems,
contracts are part of static types and contract
verification is hybrid in the sense that some contracts
are statically verified, typically by subtyping, but
others are dynamically by casts. It is, however, not
trivial to extend existing manifest contract systems,
which have been designed mostly for pure functional
languages, to imperative features, mainly because of
the lack of flow-sensitivity, which should be taken
into account in verifying imperative programs
statically. We develop an imperative higher-order
manifest contract system $ \lambda_{\rm ref}^H $ for
flow-sensitive hybrid contract verification. We
introduce a computational variant of Nanevski et al's
Hoare types, which are flow-sensitive types to
represent pre- and postconditions of impure
computation. Our Hoare types are computational in the
sense that pre- and postconditions are given by
Booleans in the same language as programs so that they
are dynamically verifiable. $ \lambda_{\rm ref}^H $
also supports refinement types as in existing manifest
contract systems to describe flow-insensitive,
state-independent contracts of pure computation. While
it is desirable that any --- possibly
state-manipulating --- predicate can be used in
contracts, abuse of stateful operations will break the
system. To control stateful operations in contracts, we
introduce a region-based effect system, which allows
contracts in refinement types and computational Hoare
types to manipulate states, as long as they are
observationally pure and read-only, respectively. We
show that dynamic contract checking in our calculus is
consistent with static typing in the sense that the
final result obtained without dynamic contract
violations satisfies contracts in its static type. It
in particular means that the state after stateful
computations satisfies their postconditions. As in some
of prior manifest contract systems, static contract
verification in this work is ``post facto,'' that is,
we first define our manifest contract system so that
all contracts are checked at run time, formalize
conditions when dynamic checks can be removed safely,
and show that programs with and without such removable
checks are contextually equivalent. We also apply the
idea of post facto verification to region-based local
reasoning, inspired by the frame rule of Separation
Logic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{deAmorim:2017:SAM,
author = "Arthur Azevedo de Amorim and Marco Gaboardi and Justin
Hsu and Shin-ya Katsumata and Ikram Cherigui",
title = "A semantic account of metric preservation",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "545--556",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009890",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program sensitivity measures how robust a program is
to small changes in its input, and is a fundamental
notion in domains ranging from differential privacy to
cyber-physical systems. A natural way to formalize
program sensitivity is in terms of metrics on the input
and output spaces, requiring that an r -sensitive
function map inputs that are at distance d to outputs
that are at distance at most r {\^A}$ \cdot $ d.
Program sensitivity is thus an analogue of Lipschitz
continuity for programs. Reed and Pierce introduced
Fuzz, a functional language with a linear type system
that can express program sensitivity. They show
soundness operationally, in the form of a metric
preservation property. Inspired by their work, we study
program sensitivity and metric preservation from a
denotational point of view. In particular, we introduce
metric CPOs, a novel semantic structure for reasoning
about computation on metric spaces, by endowing CPOs
with a compatible notion of distance. This structure is
useful for reasoning about metric properties of
programs, and specifically about program sensitivity.
We demonstrate metric CPOs by giving a model for the
deterministic fragment of Fuzz.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Smolka:2017:CMS,
author = "Steffen Smolka and Praveen Kumar and Nate Foster and
Dexter Kozen and Alexandra Silva",
title = "{Cantor} meets {Scott}: semantic foundations for
probabilistic networks",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "557--571",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009843",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "ProbNetKAT is a probabilistic extension of NetKAT with
a denotational semantics based on Markov kernels. The
language is expressive enough to generate continuous
distributions, which raises the question of how to
compute effectively in the language. This paper gives
an new characterization of ProbNetKAT's semantics using
domain theory, which provides the foundation needed to
build a practical implementation. We show how to use
the semantics to approximate the behavior of arbitrary
ProbNetKAT programs using distributions with finite
support. We develop a prototype implementation and show
how to use it to solve a variety of problems including
characterizing the expected congestion induced by
different routing schemes and reasoning
probabilistically about reachability in a network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Subramanian:2017:GSF,
author = "Kausik Subramanian and Loris D'Antoni and Aditya
Akella",
title = "{Genesis}: synthesizing forwarding tables in
multi-tenant networks",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "572--585",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009845",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Operators in multi-tenant cloud datacenters require
support for diverse and complex end-to-end policies,
such as, reachability, middlebox traversals, isolation,
traffic engineering, and network resource management.
We present Genesis, a datacenter network management
system which allows policies to be specified in a
declarative manner without explicitly programming the
network data plane. Genesis tackles the problem of
enforcing policies by synthesizing switch forwarding
tables. It uses the formal foundations of constraint
solving in combination with fast off-the-shelf SMT
solvers. To improve synthesis performance, Genesis
incorporates a novel search strategy that uses regular
expressions to specify properties that leverage the
structure of datacenter networks, and a
divide-and-conquer synthesis procedure which exploits
the structure of policy relationships. We have
prototyped Genesis, and conducted experiments with a
variety of workloads on real-world topologies to
demonstrate its performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Kopczynski:2017:LSS,
author = "Eryk Kopczy{\'n}ski and Szymon Toru{\'n}czyk",
title = "{LOIS}: syntax and semantics",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "586--598",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009876",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the semantics of an imperative programming
language called LOIS (Looping Over Infinite Sets),
which allows iterating through certain infinite sets,
in finite time. Our semantics intuitively correspond to
execution of infinitely many threads in parallel. This
allows to merge the power of abstract mathematical
constructions into imperative programming. Infinite
sets are internally represented using first order
formulas over some underlying logical structure, and
SMT solvers are employed to evaluate programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Feng:2017:CBSa,
author = "Yu Feng and Ruben Martins and Yuepeng Wang and Isil
Dillig and Thomas W. Reps",
title = "Component-based synthesis for complex {APIs}",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "599--612",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009851",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Component-based approaches to program synthesis
assemble programs from a database of existing
components, such as methods provided by an API. In this
paper, we present a novel type-directed algorithm for
component-based synthesis. The key novelty of our
approach is the use of a compact Petri-net
representation to model relationships between methods
in an API. Given a target method signature S, our
approach performs reachability analysis on the
underlying Petri-net model to identify sequences of
method calls that could be used to synthesize an
implementation of S. The programs synthesized by our
algorithm are guaranteed to type check and pass all
test cases provided by the user. We have implemented
this approach in a tool called SyPet, and used it to
successfully synthesize real-world programming tasks
extracted from on-line forums and existing code
repositories. We also compare SyPet with two
state-of-the-art synthesis tools, namely InSynth and
CodeHint, and demonstrate that SyPet can synthesize
more programs in less time. Finally, we compare our
approach with an alternative solution based on
hypergraphs and demonstrate its advantages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Moerman:2017:LNA,
author = "Joshua Moerman and Matteo Sammartino and Alexandra
Silva and Bartek Klin and Michal Szynwelski",
title = "Learning nominal automata",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "613--625",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009879",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present an Angluin-style algorithm to learn nominal
automata, which are acceptors of languages over
infinite (structured) alphabets. The abstract approach
we take allows us to seamlessly extend known variations
of the algorithm to this new setting. In particular we
can learn a subclass of nominal non-deterministic
automata. An implementation using a recently developed
Haskell library for nominal computation is provided for
preliminary experiments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Bouajjani:2017:VCC,
author = "Ahmed Bouajjani and Constantin Enea and Rachid
Guerraoui and Jad Hamza",
title = "On verifying causal consistency",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "626--638",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009888",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Causal consistency is one of the most adopted
consistency criteria for distributed implementations of
data structures. It ensures that operations are
executed at all sites according to their causal
precedence. We address the issue of verifying
automatically whether the executions of an
implementation of a data structure are causally
consistent. We consider two problems: (1) checking
whether one single execution is causally consistent,
which is relevant for developing testing and bug
finding algorithms, and (2) verifying whether all the
executions of an implementation are causally
consistent. We show that the first problem is
NP-complete. This holds even for the read-write memory
abstraction, which is a building block of many modern
distributed systems. Indeed, such systems often store
data in key-value stores, which are instances of the
read-write memory abstraction. Moreover, we prove that,
surprisingly, the second problem is undecidable, and
again this holds even for the read-write memory
abstraction. However, we show that for the read-write
memory abstraction, these negative results can be
circumvented if the implementations are data
independent, i.e., their behaviors do not depend on the
data values that are written or read at each moment,
which is a realistic assumption. We prove that for data
independent implementations, the problem of checking
the correctness of a single execution w.r.t. the
read-write memory abstraction is polynomial time.
Furthermore, we show that for such implementations the
set of non-causally consistent executions can be
represented by means of a finite number of register
automata. Using these machines as observers (in
parallel with the implementation) allows to reduce
polynomially the problem of checking causal consistency
to a state reachability problem. This reduction holds
regardless of the class of programs used for the
implementation, of the number of read-write variables,
and of the used data domain. It allows leveraging
existing techniques for assertion/reachability checking
to causal consistency verification. Moreover, for a
significant class of implementations, we derive from
this reduction the decidability of verifying causal
consistency w.r.t. the read-write memory abstraction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Srikanth:2017:CVU,
author = "Akhilesh Srikanth and Burak Sahin and William R.
Harris",
title = "Complexity verification using guided theorem
enumeration",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "639--652",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009864",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Determining if a given program satisfies a given bound
on the amount of resources that it may use is a
fundamental problem with critical practical
applications. Conventional automatic verifiers for
safety properties cannot be applied to address this
problem directly because such verifiers target
properties expressed in decidable theories; however,
many practical bounds are expressed in nonlinear
theories, which are undecidable. In this work, we
introduce an automatic verification algorithm, CAMPY,
that determines if a given program P satisfies a given
resource bound B, which may be expressed using
polynomial, exponential, and logarithmic terms. The key
technical contribution behind our verifier is an
interpolating theorem prover for non-linear theories
that lazily learns a sufficiently accurate
approximation of non-linear theories by selectively
grounding theorems of the nonlinear theory that are
relevant to proving that P satisfies B. To evaluate
CAMPY, we implemented it to target Java Virtual Machine
bytecode. We applied CAMPY to verify that over 20
solutions submitted for programming problems hosted on
popular online coding platforms satisfy or do not
satisfy expected complexity bounds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Dudenhefner:2017:ITC,
author = "Andrej Dudenhefner and Jakob Rehof",
title = "Intersection type calculi of bounded dimension",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "653--665",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009862",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A notion of dimension in intersection typed \lambda
-calculi is presented. The dimension of a typed \lambda
-term is given by the minimal norm of an elaboration (a
proof theoretic decoration) necessary for typing the
term at its type, and, intuitively, measures
intersection introduction as a resource.
Bounded-dimensional intersection type calculi are shown
to enjoy subject reduction, since terms can be
elaborated in non-increasing norm under \beta
-reduction. We prove that a multiset interpretation
(corresponding to a non-idempotent and non-linear
interpretation of intersection) of dimensionality
corresponds to the number of simultaneous constraints
required during search for inhabitants. As a
consequence, the inhabitation problem is decidable in
bounded multiset dimension, and it is proven to be
EXPSPACE-complete. This result is a substantial
generalization of inhabitation for the rank 2-fragment,
yielding a calculus with decidable inhabitation which
is independent of rank. Our results give rise to a new
criterion (dimensional bound) for subclasses of
intersection type calculi with a decidable inhabitation
problem, which is orthogonal to previously known
criteria, and which should have immediate applications
in synthesis. Additionally, we give examples of
dimensional analysis of fragments of the intersection
type system, including conservativity over simple
types, rank 2-types, and normal form typings, and we
provide some observations towards dimensional analysis
of other systems. It is suggested (for future work)
that our notion of dimension may have semantic
interpretations in terms of reduction complexity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Amin:2017:TSP,
author = "Nada Amin and Tiark Rompf",
title = "Type soundness proofs with definitional interpreters",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "666--679",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009866",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While type soundness proofs are taught in every
graduate PL class, the gap between realistic languages
and what is accessible to formal proofs is large. In
the case of Scala, it has been shown that its formal
model, the Dependent Object Types (DOT) calculus,
cannot simultaneously support key metatheoretic
properties such as environment narrowing and subtyping
transitivity, which are usually required for a type
soundness proof. Moreover, Scala and many other
realistic languages lack a general substitution
property. The first contribution of this paper is to
demonstrate how type soundness proofs for advanced,
polymorphic, type systems can be carried out with an
operational semantics based on high-level, definitional
interpreters, implemented in Coq. We present the first
mechanized soundness proofs in this style for System F
and several extensions, including mutable references.
Our proofs use only straightforward induction, which is
significant, as the combination of big-step semantics,
mutable references, and polymorphism is commonly
believed to require coinductive proof techniques. The
second main contribution of this paper is to show how
DOT-like calculi emerge from straightforward
generalizations of the operational aspects of F,
exposing a rich design space of calculi with
path-dependent types inbetween System F and DOT, which
we dub the System D Square. By working directly on the
target language, definitional interpreters can focus
the design space and expose the invariants that
actually matter at runtime. Looking at such runtime
invariants is an exciting new avenue for type system
design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Angiuli:2017:CHD,
author = "Carlo Angiuli and Robert Harper and Todd Wilson",
title = "Computational higher-dimensional type theory",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "680--693",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009861",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Formal constructive type theory has proved to be an
effective language for mechanized proof. By avoiding
non-constructive principles, such as the law of the
excluded middle, type theory admits sharper proofs and
broader interpretations of results. From a computer
science perspective, interest in type theory arises
from its applications to programming languages.
Standard constructive type theories used in
mechanization admit computational interpretations based
on meta-mathematical normalization theorems. These
proofs are notoriously brittle; any change to the
theory potentially invalidates its computational
meaning. As a case in point, Voevodsky's univalence
axiom raises questions about the computational meaning
of proofs. We consider the question: Can
higher-dimensional type theory be construed as a
programming language? We answer this question
affirmatively by providing a direct, deterministic
operational interpretation for a representative
higher-dimensional dependent type theory with higher
inductive types and an instance of univalence. Rather
than being a formal type theory defined by rules, it is
instead a computational type theory in the sense of
Martin-L{\"o}f's meaning explanations and of the NuPRL
semantics. The definition of the type theory starts
with programs; types are specifications of program
behavior. The main result is a canonicity theorem
stating that closed programs of boolean type evaluate
to true or false.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Chang:2017:TSM,
author = "Stephen Chang and Alex Knauth and Ben Greenman",
title = "Type systems as macros",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "694--705",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009886",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Turnstile, a metalanguage for creating
typed embedded languages. To implement the type system,
programmers write type checking rules resembling
traditional judgment syntax. To implement the
semantics, they incorporate elaborations into these
rules. Turnstile critically depends on the idea of
linguistic reuse. It exploits a macro system in a novel
way to simultaneously type check and rewrite a surface
program into a target language. Reusing a macro system
also yields modular implementations whose rules may be
mixed and matched to create other languages. Combined
with typical compiler and runtime reuse, Turnstile
produces performant typed embedded languages with
little effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Kumar:2017:PFA,
author = "Ananya Kumar and Guy E. Blelloch and Robert Harper",
title = "Parallel functional arrays",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "706--718",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009869",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The goal of this paper is to develop a form of
functional arrays (sequences) that are as efficient as
imperative arrays, can be used in parallel, and have
well defined cost-semantics. The key idea is to
consider sequences with functional value semantics but
non-functional cost semantics. Because the value
semantics is functional, ``updating'' a sequence
returns a new sequence. We allow operations on
``older'' sequences (called interior sequences) to be
more expensive than operations on the ``most recent''
sequences (called leaf sequences). We embed sequences
in a language supporting fork-join parallelism. Due to
the parallelism, operations can be interleaved
non-deterministically, and, in conjunction with the
different cost for interior and leaf sequences, this
can lead to non-deterministic costs for a program.
Consequently the costs of programs can be difficult to
analyze. The main result is the derivation of a
deterministic cost dynamics which makes analyzing the
costs easier. The theorems are not specific to
sequences and can be applied to other data types with
different costs for operating on interior and leaf
versions. We present a wait-free concurrent
implementation of sequences that requires constant work
for accessing and updating leaf sequences, and
logarithmic work for accessing and linear work for
updating interior sequences. We sketch a proof of
correctness for the sequence implementation. The key
advantages of the present approach compared to current
approaches is that our implementation requires no
changes to existing programming languages, supports
nested parallelism, and has well defined cost
semantics. At the same time, it allows for functional
implementations of algorithms such as depth-first
search with the same asymptotic complexity as
imperative implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Konnov:2017:SCP,
author = "Igor Konnov and Marijana Lazi{\'c} and Helmut Veith
and Josef Widder",
title = "A short counterexample property for safety and
liveness verification of fault-tolerant distributed
algorithms",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "719--734",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009860",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Distributed algorithms have many mission-critical
applications ranging from embedded systems and
replicated databases to cloud computing. Due to
asynchronous communication, process faults, or network
failures, these algorithms are difficult to design and
verify. Many algorithms achieve fault tolerance by
using threshold guards that, for instance, ensure that
a process waits until it has received an acknowledgment
from a majority of its peers. Consequently,
domain-specific languages for fault-tolerant
distributed systems offer language support for
threshold guards. We introduce an automated method for
model checking of safety and liveness of
threshold-guarded distributed algorithms in systems
where the number of processes and the fraction of
faulty processes are parameters. Our method is based on
a short counterexample property: if a distributed
algorithm violates a temporal specification (in a
fragment of LTL), then there is a counterexample whose
length is bounded and independent of the parameters. We
prove this property by (i) characterizing executions
depending on the structure of the temporal formula, and
(ii) using commutativity of transitions to accelerate
and shorten executions. We extended the ByMC toolset
(Byzantine Model Checker) with our technique, and
verified liveness and safety of 10 prominent
fault-tolerant distributed algorithms, most of which
were out of reach for existing techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Liu:2017:ADB,
author = "Xinxin Liu and Tingting Yu and Wenhui Zhang",
title = "Analyzing divergence in bisimulation semantics",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "735--747",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009870",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Some bisimulation based abstract equivalence relations
may equate divergent systems with non-divergent ones,
examples including weak bisimulation equivalence and
branching bisimulation equivalence. Thus extra efforts
are needed to analyze divergence for the compared
systems. In this paper we propose a new method for
analyzing divergence in bisimulation semantics, which
relies only on simple observations of individual
transitions. We show that this method can verify
several typical divergence preserving bisimulation
equivalences including two well-known ones. As an
application case study, we use the proposed method to
verify the HSY collision stack to draw the conclusion
that the stack implementation is correct in terms of
linearizability with lock-free progress condition.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Lange:2017:FGL,
author = "Julien Lange and Nicholas Ng and Bernardo Toninho and
Nobuko Yoshida",
title = "Fencing off {Go}: liveness and safety for
channel-based programming",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "748--761",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009847",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Go is a production-level statically typed programming
language whose design features explicit message-passing
primitives and lightweight threads, enabling (and
encouraging) programmers to develop concurrent systems
where components interact through communication more so
than by lock-based shared memory concurrency. Go can
only detect global deadlocks at runtime, but provides
no compile-time protection against all too common
communication mismatches or partial deadlocks. This
work develops a static verification framework for
bounded liveness and safety in Go programs, able to
detect communication errors and partial deadlocks in a
general class of realistic concurrent programs,
including those with dynamic channel creation and
infinite recursion. Our approach infers from a Go
program a faithful representation of its communication
patterns as a behavioural type. By checking a syntactic
restriction on channel usage, dubbed fencing, we ensure
that programs are made up of finitely many different
communication patterns that may be repeated infinitely
many times. This restriction allows us to implement
bounded verification procedures (akin to bounded model
checking) to check for liveness and safety in types
which in turn approximates liveness and safety in Go
programs. We have implemented a type inference and
liveness and safety checks in a tool-chain and tested
it against publicly available Go programs. Updated on
27th Feb 2017. See Comments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Vitousek:2017:BTL,
author = "Michael M. Vitousek and Cameron Swords and Jeremy G.
Siek",
title = "Big types in little runtime: open-world soundness and
collaborative blame for gradual type systems",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "762--774",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009849",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Gradual typing combines static and dynamic typing in
the same language, offering programmers the error
detection and strong guarantees of static types and the
rapid prototyping and flexible programming idioms of
dynamic types. Many gradually typed languages are
implemented by translation into an untyped target
language (e.g., Typed Clojure, TypeScript, Gradualtalk,
and Reticulated Python). For such languages, it is
desirable to support arbitrary interaction between
translated code and legacy code in the untyped language
while maintaining the type soundness of the translated
code. In this paper we formalize this goal in the form
of the open-world soundness criterion. We discuss why
it is challenging to achieve open-world soundness using
the traditional proxy-based approach for higher-order
casts. However, the transient design satisfies
open-world soundness. Indeed, we present a formal
semantics for the transient design and prove that our
semantics satisfies open-world soundness. In this paper
we also solve a challenging problem for the transient
design: how to provide blame tracking without proxies.
We define a semantics for blame and prove the Blame
Theorem. We also prove that the Gradual Guarantee holds
for this system, ensuring that programs can be evolved
freely between static and dynamic typing. Finally, we
demonstrate that the runtime overhead of the transient
approach is low in the context of Reticulated Python,
an implementation of gradual typing for Python.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Lehmann:2017:GRT,
author = "Nico Lehmann and {\'E}ric Tanter",
title = "Gradual refinement types",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "775--788",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009856",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Refinement types are an effective language-based
verification technique. However, as any expressive
typing discipline, its strength is its weakness,
imposing sometimes undesired rigidity. Guided by
abstract interpretation, we extend the gradual typing
agenda and develop the notion of gradual refinement
types, allowing smooth evolution and interoperability
between simple types and logically-refined types. In
doing so, we address two challenges unexplored in the
gradual typing literature: dealing with imprecise
logical information, and with dependent function types.
The first challenge leads to a crucial notion of
locality for refinement formulas, and the second yields
novel operators related to type- and term-level
substitution, identifying new opportunity for runtime
errors in gradual dependently-typed languages. The
gradual language we present is type safe, type sound,
and satisfies the refined criteria for gradually-typed
languages of Siek et al. We also explain how to extend
our approach to richer refinement logics, anticipating
key challenges to consider.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Cimini:2017:AGD,
author = "Matteo Cimini and Jeremy G. Siek",
title = "Automatically generating the dynamic semantics of
gradually typed languages",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "789--803",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009863",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many language designers have adopted gradual typing.
However, there remains open questions regarding how to
gradualize languages. Cimini and Siek (2016) created a
methodology and algorithm to automatically generate the
type system of a gradually typed language from a fully
static version of the language. In this paper, we
address the next challenge of how to automatically
generate the dynamic semantics of gradually typed
languages. Such languages typically use an intermediate
language with explicit casts. Our first result is a
methodology for generating the syntax, type system, and
dynamic semantics of the intermediate language with
casts. Next, we present an algorithm that formalizes
and automates the methodology, given a language
definition as input. We show that our approach is
general enough to automatically gradualize several
languages, including features such as polymorphism,
recursive types and exceptions. We prove that our
algorithm produces languages that satisfy the key
correctness criteria of gradual typing. Finally, we
implement the algorithm, generating complete
specifications of gradually typed languages in
lambda-Prolog, including executable interpreters.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Jafery:2017:SUR,
author = "Khurram A. Jafery and Joshua Dunfield",
title = "Sums of uncertainty: refinements go gradual",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "804--817",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009865",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A long-standing shortcoming of statically typed
functional languages is that type checking does not
rule out pattern-matching failures (run-time match
exceptions). Refinement types distinguish different
values of datatypes; if a program annotated with
refinements passes type checking, pattern-matching
failures become impossible. Unfortunately, refinement
is a monolithic property of a type, exacerbating the
difficulty of adding refinement types to nontrivial
programs. Gradual typing has explored how to
incrementally move between static typing and dynamic
typing. We develop a type system of gradual sums that
combines refinement with imprecision. Then, we develop
a bidirectional version of the type system, which rules
out excessive imprecision, and give a type-directed
translation to a target language with explicit casts.
We prove that the static sublanguage cannot have match
failures, that a well-typed program remains well-typed
if its type annotations are made less precise, and that
making annotations less precise causes target programs
to fail later. Several of these results correspond to
criteria for gradual typing given by Siek et al.
(2015).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Ying:2017:IQP,
author = "Mingsheng Ying and Shenggang Ying and Xiaodi Wu",
title = "Invariants of quantum programs: characterisations and
generation",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "818--832",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009840",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program invariant is a fundamental notion widely used
in program verification and analysis. The aim of this
paper is twofold: (i) find an appropriate definition of
invariants for quantum programs; and (ii) develop an
effective technique of invariant generation for
verification and analysis of quantum programs.
Interestingly, the notion of invariant can be defined
for quantum programs in two different ways --- additive
invariants and multiplicative invariants ---
corresponding to two interpretations of implication in
a continuous valued logic: the Lukasiewicz implication
and the Godel implication. It is shown that both of
them can be used to establish partial correctness of
quantum programs. The problem of generating additive
invariants of quantum programs is addressed by reducing
it to an SDP (Semidefinite Programming) problem. This
approach is applied with an SDP solver to generate
invariants of two important quantum algorithms ---
quantum walk and quantum Metropolis sampling. Our
examples show that the generated invariants can be used
to verify correctness of these algorithms and are
helpful in optimising quantum Metropolis sampling. To
our knowledge, this paper is the first attempt to
define the notion of invariant and to develop a method
of invariant generation for quantum programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{DalLago:2017:GPC,
author = "Ugo {Dal Lago} and Claudia Faggian and Beno{\^\i}t
Valiron and Akira Yoshimizu",
title = "The geometry of parallelism: classical, probabilistic,
and quantum effects",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "833--845",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009859",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a Geometry of Interaction model for
higher-order quantum computation, and prove its
adequacy for a fully fledged quantum programming
language in which entanglement, duplication, and
recursion are all available. This model is an instance
of a new framework which captures not only quantum but
also classical and probabilistic computation. Its main
feature is the ability to model commutative effects in
a parallel setting. Our model comes with a multi-token
machine, a proof net system, and a -style language.
Being based on a multi-token machine equipped with a
memory, it has a concrete nature which makes it well
suited for building low-level operational descriptions
of higher-order languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Paykin:2017:QCL,
author = "Jennifer Paykin and Robert Rand and Steve Zdancewic",
title = "{QWIRE}: a core language for quantum circuits",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "846--858",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009894",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces QWIRE (``choir''), a language
for defining quantum circuits and an interface for
manipulating them inside of an arbitrary classical host
language. QWIRE is minimal---it contains only a few
primitives---and sound with respect to the physical
properties entailed by quantum mechanics. At the same
time, QWIRE is expressive and highly modular due to its
relationship with the host language, mirroring the QRAM
model of computation that places a quantum computer
(controlled by circuits) alongside a classical computer
(controlled by the host language). We present QWIRE
along with its type system and operational semantics,
which we prove is safe and strongly normalizing
whenever the host language is. We give circuits a
denotational semantics in terms of density matrices.
Throughout, we investigate examples that demonstrate
the expressive power of QWIRE, including extensions to
the host language that (1) expose a general analysis
framework for circuits, and (2) provide dependent
types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Amin:2017:LVA,
author = "Nada Amin and Tiark Rompf",
title = "{LMS-Verify}: abstraction without regret for verified
systems programming",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "859--873",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009867",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Performance critical software is almost always
developed in C, as programmers do not trust high-level
languages to deliver the same reliable performance.
This is bad because low-level code in unsafe languages
attracts security vulnerabilities and because
development is far less productive, with PL advances
mostly lost on programmers operating under tight
performance constraints. High-level languages provide
memory safety out of the box, but they are deemed too
slow and unpredictable for serious system software.
Recent years have seen a surge in staging and
generative programming: the key idea is to use
high-level languages and their abstraction power as
glorified macro systems to compose code fragments in
first-order, potentially domain-specific, intermediate
languages, from which fast C can be emitted. But what
about security? Since the end result is still C code,
the safety guarantees of the high-level host language
are lost. In this paper, we extend this generative
approach to emit ACSL specifications along with C code.
We demonstrate that staging achieves ``abstraction
without regret'' for verification: we show how
high-level programming models, in particular
higher-order composable contracts from dynamic
languages, can be used at generation time to compose
and generate first-order specifications that can be
statically checked by existing tools. We also show how
type classes can automatically attach invariants to
data types, reducing the need for repetitive manual
annotations. We evaluate our system on several case
studies that varyingly exercise verification of memory
safety, overflow safety, and functional correctness. We
feature an HTTP parser that is (1) fast (2) high-level:
implemented using staged parser combinators (3) secure:
with verified memory safety. This result is
significant, as input parsing is a key attack vector,
and vulnerabilities related to HTTP parsing have been
documented in all widely-used web servers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Assaf:2017:HSA,
author = "Mounir Assaf and David A. Naumann and Julien Signoles
and {\'E}ric Totel and Fr{\'e}d{\'e}ric Tronel",
title = "Hypercollecting semantics and its application to
static analysis of information flow",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "874--887",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009889",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We show how static analysis for secure information
flow can be expressed and proved correct entirely
within the framework of abstract interpretation. The
key idea is to define a Galois connection that directly
approximates the hyperproperty of interest. To enable
use of such Galois connections, we introduce a fixpoint
characterisation of hypercollecting semantics, i.e. a
``set of sets'' transformer. This makes it possible to
systematically derive static analyses for
hyperproperties entirely within the calculational
framework of abstract interpretation. We evaluate this
technique by deriving example static analyses. For
qualitative information flow, we derive a dependence
analysis similar to the logic of Amtoft and Banerjee
(SAS'04) and the type system of Hunt and Sands
(POPL'06). For quantitative information flow, we derive
a novel cardinality analysis that bounds the leakage
conveyed by a program instead of simply deciding
whether it exists. This encompasses problems that are
hypersafety but not k -safety. We put the framework to
use and introduce variations that achieve precision
rivalling the most recent and precise static analyses
for information flow.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Zhang:2017:LTA,
author = "Danfeng Zhang and Daniel Kifer",
title = "{LightDP}: towards automating differential privacy
proofs",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "888--901",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009884",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The growing popularity and adoption of differential
privacy in academic and industrial settings has
resulted in the development of increasingly
sophisticated algorithms for releasing information
while preserving privacy. Accompanying this phenomenon
is the natural rise in the development and publication
of incorrect algorithms, thus demonstrating the
necessity of formal verification tools. However,
existing formal methods for differential privacy face a
dilemma: methods based on customized logics can verify
sophisticated algorithms but come with a steep learning
curve and significant annotation burden on the
programmers, while existing programming platforms lack
expressive power for some sophisticated algorithms. In
this paper, we present LightDP, a simple imperative
language that strikes a better balance between
expressive power and usability. The core of LightDP is
a novel relational type system that separates
relational reasoning from privacy budget calculations.
With dependent types, the type system is powerful
enough to verify sophisticated algorithms where the
composition theorem falls short. In addition, the
inference engine of LightDP infers most of the proof
details, and even searches for the proof with minimal
privacy cost when multiple proofs exist. We show that
LightDP verifies sophisticated algorithms with little
manual effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "POPL '17 conference proceedings.",
}
@Article{Tallada:2016:CGP,
author = "Marc Gonzalez Tallada",
title = "Coarse grain parallelization of deep neural networks",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "1:1--1:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851158",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deep neural networks (DNN) have recently achieved
extraordinary results in domains like computer vision
and speech recognition. An essential element for this
success has been the introduction of high performance
computing (HPC) techniques in the critical step of
training the neural network. This paper describes the
implementation and analysis of a network-agnostic and
convergence-invariant coarse-grain parallelization of
the DNN training algorithm. The coarse-grain
parallelization is achieved through the exploitation of
the batch-level parallelism. This strategy is
independent from the support of specialized and
optimized libraries. Therefore, the optimization is
immediately available for accelerating the DNN
training. The proposal is compatible with multi-GPU
execution without altering the algorithm convergence
rate. The parallelization has been implemented in
Caffe, a state-of-the-art DNN framework. The paper
describes the code transformations for the
parallelization and we also identify the limiting
performance factors of the approach. We show
competitive performance results for two
state-of-the-art computer vision datasets, MNIST and
CIFAR-10. In particular, on a 16-core Xeon E5-2667v2 at
3.30GHz we observe speedups of 8$ \times $ over the
sequential execution, at similar performance levels of
those obtained by the GPU optimized Caffe version in a
NVIDIA K40 GPU.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Wang:2016:HPM,
author = "Xiao Wang and Amit Sabne and Sherman Kisner and Anand
Raghunathan and Charles Bouman and Samuel Midkiff",
title = "High performance model based image reconstruction",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "2:1--2:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851163",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computed Tomography (CT) Image Reconstruction is an
important technique used in a wide range of
applications, ranging from explosive detection, medical
imaging to scientific imaging. Among available
reconstruction methods, Model Based Iterative
Reconstruction (MBIR) produces higher quality images
and allows for the use of more general CT scanner
geometries than is possible with more commonly used
methods. The high computational cost of MBIR, however,
often makes it impractical in applications for which it
would otherwise be ideal. This paper describes a new
MBIR implementation that significantly reduces the
computational cost of MBIR while retaining its
benefits. It describes a novel organization of the
scanner data into super-voxels (SV) that, combined with
a super-voxel buffer (SVB), dramatically increase
locality and prefetching, enable parallelism across SVs
and lead to an average speedup of 187 on 20 cores.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Agrawal:2016:EAE,
author = "Sandeep R. Agrawal and Christopher M. Dee and Alvin R.
Lebeck",
title = "Exploiting accelerators for efficient high dimensional
similarity search",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "3:1--3:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851144",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Similarity search finds the most similar matches in an
object collection for a given query; making it an
important problem across a wide range of disciplines
such as web search, image recognition and protein
sequencing. Practical implementations of High
Dimensional Similarity Search (HDSS) search across
billions of possible solutions for multiple queries in
real time, making its performance and efficiency a
significant challenge. Existing clusters and
datacenters use commercial multicore hardware to
perform search, which may not provide the optimal
performance and performance per Watt. This work
explores the performance, power and cost benefits of
using throughput accelerators like GPUs to perform
similarity search for query cohorts even under tight
deadlines. We propose optimized implementations of
similarity search for both the host and the
accelerator. Augmenting existing Xeon servers with
accelerators results in a 3$ \times $ improvement in
throughput per machine, resulting in a more than 2.5$
\times $ reduction in cost of ownership, even for
discounted Xeon servers. Replacing a Xeon based cluster
with an accelerator based cluster for similarity search
reduces the total cost of ownership by more than 6$
\times $ to 16$ \times $ while consuming significantly
less power than an ARM based cluster.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Cruz:2016:DCG,
author = "Flavio Cruz and Ricardo Rocha and Seth Copen
Goldstein",
title = "Declarative coordination of graph-based parallel
programs",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "4:1--4:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851153",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Declarative programming has been hailed as a promising
approach to parallel programming since it makes it
easier to reason about programs while hiding the
implementation details of parallelism from the
programmer. However, its advantage is also its
disadvantage as it leaves the programmer with no
straightforward way to optimize programs for
performance. In this paper, we introduce Coordinated
Linear Meld (CLM), a concurrent forward-chaining linear
logic programming language, with a declarative way to
coordinate the execution of parallel programs allowing
the programmer to specify arbitrary scheduling and data
partitioning policies. Our approach allows the
programmer to write graph-based declarative programs
and then optionally to use coordination to fine-tune
parallel performance. In this paper we specify the set
of coordination facts, discuss their implementation in
a parallel virtual machine, and show---through
example---how they can be used to optimize parallel
execution. We compare the performance of CLM programs
against the original uncoordinated Linear Meld and
several other frameworks.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Denniston:2016:DH,
author = "Tyler Denniston and Shoaib Kamil and Saman
Amarasinghe",
title = "Distributed {Halide}",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "5:1--5:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851157",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many image processing tasks are naturally expressed as
a pipeline of small computational kernels known as
stencils. Halide is a popular domain-specific language
and compiler designed to implement image processing
algorithms. Halide uses simple language constructs to
express what to compute and a separate scheduling
co-language for expressing when and where to perform
the computation. This approach has demonstrated
performance comparable to or better than hand-optimized
code. Until now, however, Halide has been restricted to
parallel shared memory execution, limiting its
performance for memory-bandwidth-bound pipelines or
large-scale image processing tasks. We present an
extension to Halide to support distributed-memory
parallel execution of complex stencil pipelines. These
extensions compose with the existing scheduling
constructs in Halide, allowing expression of complex
computation and communication strategies. Existing
Halide applications can be distributed with minimal
changes, allowing programmers to explore the tradeoff
between recomputation and communication with little
effort. Approximately 10 new of lines code are needed
even for a 200 line, 99 stage application. On nine
image processing benchmarks, our extensions give up to
a 1.4$ \times $ speedup on a single node over regular
multithreaded execution with the same number of cores,
by mitigating the effects of non-uniform memory access.
The distributed benchmarks achieve up to 18$ \times $
speedup on a 16 node testing machine and up to 57$
\times $ speedup on 64 nodes of the NERSC Cori
supercomputer.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Newton:2016:PTC,
author = "Ryan R. Newton and {\"O}mer S. Agacan and Peter Fogg
and Sam Tobin-Hochstadt",
title = "Parallel type-checking with {Haskell} using saturating
{LVars} and stream generators",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "6:1--6:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851142",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Given the sophistication of recent type systems,
unification-based type-checking and inference can be a
time-consuming phase of compilation---especially when
union types are combined with subtyping. It is natural
to consider improving performance through parallelism,
but these algorithms are challenging to parallelize due
to complicated control structure and difficulties
representing data in a way that is both efficient and
supports concurrency. We provide techniques that
address these problems based on the LVish approach to
deterministic-by-default parallel programming. We
extend LVish with Saturating LVars, the first LVars
implemented to release memory during the object's
lifetime. Our design allows us to achieve a parallel
speedup on worst-case (exponential) inputs of
Hindley-Milner inference, and on the Typed Racket
type-checking algorithm, which yields up an 8.46$
\times $ parallel speedup on 14 cores for type-checking
examples drawn from the Racket repository.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Wang:2016:APG,
author = "Lei Wang and Fan Yang and Liangji Zhuang and Huimin
Cui and Fang Lv and Xiaobing Feng",
title = "Articulation points guided redundancy elimination for
betweenness centrality",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "7:1--7:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851154",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Betweenness centrality (BC) is an important metrics in
graph analysis which indicates critical vertices in
large-scale networks based on shortest path
enumeration. Typically, a BC algorithm constructs a
shortest-path DAG for each vertex to calculate its BC
score. However, for emerging real-world graphs, even
the state-of-the-art BC algorithm will introduce a
number of redundancies, as suggested by the existence
of articulation points. Articulation points imply some
common sub-DAGs in the DAGs for different vertices, but
existing algorithms do not leverage such information
and miss the optimization opportunity. We propose a
redundancy elimination approach, which identifies the
common sub-DAGs shared between the DAGs for different
vertices. Our approach leverages the articulation
points and reuses the results of the common sub-DAGs in
calculating the BC scores, which eliminates redundant
computations. We implemented the approach as an
algorithm with two-level parallelism and evaluated it
on a multicore platform. Compared to the
state-of-the-art implementation using shared memory,
our approach achieves an average speedup of 4.6x across
a variety of real-world graphs, with the traversal
rates up to 45 ~ 2400 MTEPS (Millions of Traversed
Edges per Second).",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Bloemen:2016:MCF,
author = "Vincent Bloemen and Alfons Laarman and Jaco van de
Pol",
title = "Multi-core on-the-fly {SCC} decomposition",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "8:1--8:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851161",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The main advantages of Tarjan's strongly connected
component (SCC) algorithm are its linear time
complexity and ability to return SCCs on-the-fly, while
traversing or even generating the graph. Until now,
most parallel SCC algorithms sacrifice both: they run
in quadratic worst-case time and/or require the full
graph in advance. The current paper presents a novel
parallel, on-the-fly SCC algorithm. It preserves the
linear-time property by letting workers explore the
graph randomly while carefully communicating partially
completed SCCs. We prove that this strategy is correct.
For efficiently communicating partial SCCs, we develop
a concurrent, iterable disjoint set structure
(combining the union-find data structure with a cyclic
list). We demonstrate scalability on a 64-core machine
using 75 real-world graphs (from model checking and
explicit data graphs), synthetic graphs (combinations
of trees, cycles and linear graphs), and random graphs.
Previous work did not show speedups for graphs
containing a large SCC. We observe that our parallel
algorithm is typically 10-30$ \times $ faster compared
to Tarjan's algorithm for graphs containing a large
SCC. Comparable performance (with respect to the
current state-of-the-art) is obtained for graphs
containing many small SCCs.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Kannan:2016:HPP,
author = "Ramakrishnan Kannan and Grey Ballard and Haesun Park",
title = "A high-performance parallel algorithm for nonnegative
matrix factorization",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "9:1--9:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851152",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Non-negative matrix factorization (NMF) is the problem
of determining two non-negative low rank factors W and
H, for the given input matrix A, such that A \approx
WH. NMF is a useful tool for many applications in
different domains such as topic modeling in text
mining, background separation in video analysis, and
community detection in social networks. Despite its
popularity in the data mining community, there is a
lack of efficient distributed algorithms to solve the
problem for big data sets. We propose a
high-performance distributed-memory parallel algorithm
that computes the factorization by iteratively solving
alternating non-negative least squares (NLS)
subproblems for W and H. It maintains the data and
factor matrices in memory (distributed across
processors), uses MPI for interprocessor communication,
and, in the dense case, provably minimizes
communication costs (under mild assumptions). As
opposed to previous implementations, our algorithm is
also flexible: (1) it performs well for both dense and
sparse matrices, and (2) it allows the user to choose
any one of the multiple algorithms for solving the
updates to low rank factors W and H within the
alternating iterations. We demonstrate the scalability
of our algorithm and compare it with baseline
implementations, showing significant performance
improvements.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Chowdhury:2016:AAD,
author = "Rezaul Chowdhury and Pramod Ganapathi and Jesmin Jahan
Tithi and Charles Bachmeier and Bradley C. Kuszmaul and
Charles E. Leiserson and Armando Solar-Lezama and Yuan
Tang",
title = "{AUTOGEN}: automatic discovery of cache-oblivious
parallel recursive algorithms for solving dynamic
programs",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "10:1--10:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851167",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present AUTOGEN---an algorithm that for a wide
class of dynamic programming (DP) problems
automatically discovers highly efficient
cache-oblivious parallel recursive divide-and-conquer
algorithms from inefficient iterative descriptions of
DP recurrences. AUTOGEN analyzes the set of DP table
locations accessed by the iterative algorithm when run
on a DP table of small size, and automatically
identifies a recursive access pattern and a
corresponding provably correct recursive algorithm for
solving the DP recurrence. We use AUTOGEN to
autodiscover efficient algorithms for several
well-known problems. Our experimental results show that
several autodiscovered algorithms significantly
outperform parallel looping and tiled loop-based
algorithms. Also these algorithms are less sensitive to
fluctuations of memory and bandwidth compared with
their looping counterparts, and their running times and
energy profiles remain relatively more stable. To the
best of our knowledge, AUTOGEN is the first algorithm
that can automatically discover new nontrivial
divide-and-conquer algorithms.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Wang:2016:GHP,
author = "Yangzihao Wang and Andrew Davidson and Yuechao Pan and
Yuduo Wu and Andy Riffel and John D. Owens",
title = "{Gunrock}: a high-performance graph processing library
on the {GPU}",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "11:1--11:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851145",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For large-scale graph analytics on the GPU, the
irregularity of data access/control flow and the
complexity of programming GPUs have been two
significant challenges for developing a programmable
high-performance graph library. ``Gunrock,'' our
high-level bulk-synchronous graph-processing system
targeting the GPU, takes a new approach to abstracting
GPU graph analytics: rather than designing an
abstraction around computation, Gunrock instead
implements a novel data-centric abstraction centered on
operations on a vertex or edge frontier. Gunrock
achieves a balance between performance and
expressiveness by coupling high-performance GPU
computing primitives and optimization strategies with a
high-level programming model that allows programmers to
quickly develop new graph primitives with small code
size and minimal GPU programming knowledge. We evaluate
Gunrock on five graph primitives (BFS, BC, SSSP, CC,
and PageRank) and show that Gunrock has on average at
least an order of magnitude speedup over Boost and
PowerGraph, comparable performance to the fastest GPU
hardwired primitives, and better performance than any
other GPU high-level graph library.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Ashkiani:2016:GM,
author = "Saman Ashkiani and Andrew Davidson and Ulrich Meyer
and John D. Owens",
title = "{GPU} multisplit",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "12:1--12:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851169",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multisplit is a broadly useful parallel primitive that
permutes its input data into contiguous buckets or
bins, where the function that categorizes an element
into a bucket is provided by the programmer. Due to the
lack of an efficient multisplit on GPUs, programmers
often choose to implement multisplit with a sort.
However, sort does more work than necessary to
implement multisplit, and is thus inefficient. In this
work, we provide a parallel model and multiple
implementations for the multisplit problem. Our
principal focus is multisplit for a small number of
buckets. In our implementations, we exploit the
computational hierarchy of the GPU to perform most of
the work locally, with minimal usage of global
operations. We also use warp-synchronous programming
models to avoid branch divergence and reduce memory
usage, as well as hierarchical reordering of input
elements to achieve better coalescing of global memory
accesses. On an NVIDIA K40c GPU, for key-only
(key-value) multisplit, we demonstrate a 3.0-6.7x
(4.4-8.0x) speedup over radix sort, and achieve a peak
throughput of 10.0 G keys/s.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Matteis:2016:KCR,
author = "Tiziano {De Matteis} and Gabriele Mencagli",
title = "Keep calm and react with foresight: strategies for
low-latency and energy-efficient elastic data stream
processing",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "13:1--13:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851148",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper addresses the problem of designing scaling
strategies for elastic data stream processing.
Elasticity allows applications to rapidly change their
configuration on-the-fly (e.g., the amount of used
resources) in response to dynamic workload
fluctuations. In this work we face this problem by
adopting the Model Predictive Control technique, a
control-theoretic method aimed at finding the optimal
application configuration along a limited prediction
horizon in the future by solving an online optimization
problem. Our control strategies are designed to address
latency constraints, using Queueing Theory models, and
energy consumption by changing the number of used cores
and the CPU frequency through the Dynamic Voltage and
Frequency Scaling (DVFS) support available in the
modern multicore CPUs. The proactive capabilities, in
addition to the latency- and energy-awareness,
represent the novel features of our approach. To
validate our methodology, we develop a thorough set of
experiments on a high-frequency trading application.
The results demonstrate the high-degree of flexibility
and configurability of our approach, and show the
effectiveness of our elastic scaling strategies
compared with existing state-of-the-art techniques used
in similar scenarios.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Li:2016:WSI,
author = "Jing Li and Kunal Agrawal and Sameh Elnikety and
Yuxiong He and I-Ting Angelina Lee and Chenyang Lu and
Kathryn S. McKinley",
title = "Work stealing for interactive services to meet target
latency",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "14:1--14:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851151",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Interactive web services increasingly drive critical
business workloads such as search, advertising, games,
shopping, and finance. Whereas optimizing parallel
programs and distributed server systems have
historically focused on average latency and throughput,
the primary metric for interactive applications is
instead consistent responsiveness, i.e., minimizing the
number of requests that miss a target latency. This
paper is the first to show how to generalize
work-stealing, which is traditionally used to minimize
the makespan of a single parallel job, to optimize for
a target latency in interactive services with multiple
parallel requests. We design a new adaptive work
stealing policy, called tail-control, that reduces the
number of requests that miss a target latency. It uses
instantaneous request progress, system load, and a
target latency to choose when to parallelize requests
with stealing, when to admit new requests, and when to
limit parallelism of large requests. We implement this
approach in the Intel Thread Building Block (TBB)
library and evaluate it on real-world workloads and
synthetic workloads. The tail-control policy
substantially reduces the number of requests exceeding
the desired target latency and delivers up to 58\%
relative improvement over various baseline policies.
This generalization of work stealing for multiple
requests effectively optimizes the number of requests
that complete within a target latency, a key metric for
interactive services.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Steele:2016:AAC,
author = "Guy L. {Steele, Jr.} and Jean-Baptiste Tristan",
title = "Adding approximate counters",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "15:1--15:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851147",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a general framework for adding the values
of two approximate counters to produce a new
approximate counter value whose expected estimated
value is equal to the sum of the expected estimated
values of the given approximate counters. (To the best
of our knowledge, this is the first published
description of any algorithm for adding two approximate
counters.) We then work out implementation details for
five different kinds of approximate counter and provide
optimized pseudocode. For three of them, we present
proofs that the variance of a counter value produced by
adding two counter values in this way is bounded, and
in fact is no worse, or not much worse, than the
variance of the value of a single counter to which the
same total number of increment operations have been
applied. Addition of approximate counters is useful in
massively parallel divide-and-conquer algorithms that
use a distributed representation for large arrays of
counters. We describe two machine-learning algorithms
for topic modeling that use millions of integer
counters, and confirm that replacing the integer
counters with approximate counters is effective,
speeding up a GPU-based implementation by over 65\% and
a CPU-based by nearly 50\%, as well as reducing memory
requirements, without degrading their statistical
effectiveness.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Yang:2016:WFQ,
author = "Chaoran Yang and John Mellor-Crummey",
title = "A wait-free queue as fast as fetch-and-add",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "16:1--16:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851168",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent data structures that have fast and
predictable performance are of critical importance for
harnessing the power of multicore processors, which are
now ubiquitous. Although wait-free objects, whose
operations complete in a bounded number of steps, were
devised more than two decades ago, wait-free objects
that can deliver scalable high performance are still
rare. In this paper, we present the first wait-free
FIFO queue based on fetch-and-add (FAA). While
compare-and-swap (CAS) based non-blocking algorithms
may perform poorly due to work wasted by CAS failures,
algorithms that coordinate using FAA, which is
guaranteed to succeed, can in principle perform better
under high contention. Along with FAA, our queue uses a
custom epoch-based scheme to reclaim memory; on x86
architectures, it requires no extra memory fences on
our algorithm's typical execution path. An empirical
study of our new FAA-based wait-free FIFO queue under
high contention on four different architectures with
many hardware threads shows that it outperforms prior
queue designs that lack a wait-free progress guarantee.
Surprisingly, at the highest level of contention, the
throughput of our queue is often as high as that of a
microbenchmark that only performs FAA. As a result, our
fast wait-free queue implementation is useful in
practice on most multi-core systems today. We believe
that our design can serve as an example of how to
construct other fast wait-free objects.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Haider:2016:LRA,
author = "Syed Kamran Haider and William Hasenplaugh and Dan
Alistarh",
title = "Lease\slash release: architectural support for scaling
contended data structures",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "17:1--17:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851155",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "High memory contention is generally agreed to be a
worst-case scenario for concurrent data structures.
There has been a significant amount of research effort
spent investigating designs which minimize contention,
and several programming techniques have been proposed
to mitigate its effects. However, there are currently
few architectural mechanisms to allow scaling contended
data structures at high thread counts. In this paper,
we investigate hardware support for scalable contended
data structures. We propose Lease/Release, a simple
addition to standard directory-based MSI cache
coherence protocols, allowing participants to lease
memory, at the granularity of cache lines, by delaying
coherence messages for a short, bounded period of time.
Our analysis shows that Lease/Release can significantly
reduce the overheads of contention for both
non-blocking (lock-free) and lock-based data structure
implementations, while ensuring that no deadlocks are
introduced. We validate Lease/Release empirically on
the Graphite multiprocessor simulator, on a range of
data structures, including queue, stack, and priority
queue implementations, as well as on transactional
applications. Results show that Lease/Release
consistently improves both throughput and energy usage,
by up to 5x, both for lock-free and lock-based data
structure designs.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Guerraoui:2016:OCO,
author = "Rachid Guerraoui and Vasileios Trigonakis",
title = "Optimistic concurrency with {OPTIK}",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "18:1--18:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851146",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce OPTIK, a new practical design pattern for
designing and implementing fast and scalable concurrent
data structures. OPTIK relies on the commonly-used
technique of version numbers for detecting conflicting
concurrent operations. We show how to implement the
OPTIK pattern using the novel concept of OPTIK locks.
These locks enable the use of version numbers for
implementing very efficient optimistic concurrent data
structures. Existing state-of-the-art lock-based data
structures acquire the lock and then check for
conflicts. In contrast, with OPTIK locks, we merge the
lock acquisition with the detection of conflicting
concurrency in a single atomic step, similarly to
lock-free algorithms. We illustrate the power of our
OPTIK pattern and its implementation by introducing
four new algorithms and by optimizing four
state-of-the-art algorithms for linked lists, skip
lists, hash tables, and queues. Our results show that
concurrent data structures built using OPTIK are more
scalable than the state of the art.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Dice:2016:RTL,
author = "Dave Dice and Alex Kogan and Yossi Lev",
title = "Refined transactional lock elision",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "19:1--19:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851162",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional lock elision (TLE) is a well-known
technique that exploits hardware transactional memory
(HTM) to introduce concurrency into lock-based
software. It achieves that by attempting to execute a
critical section protected by a lock in an atomic
hardware transaction, reverting to the lock if these
attempts fail. One significant drawback of TLE is that
it disables hardware speculation once there is a thread
running under lock. In this paper we present two
algorithms that rely on existing compiler support for
transactional programs and allow threads to speculate
concurrently on HTM along with a thread holding the
lock. We demonstrate the benefit of our algorithms over
TLE and other related approaches with an in-depth
analysis of a number of benchmarks and a wide range of
workloads, including an AVL tree-based micro-benchmark
and ccTSA, a real sequence assembler application.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Cao:2016:DBG,
author = "Man Cao and Minjia Zhang and Aritra Sengupta and
Michael D. Bond",
title = "Drinking from both glasses: combining pessimistic and
optimistic tracking of cross-thread dependences",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "20:1--20:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851143",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is notoriously challenging to develop parallel
software systems that are both scalable and correct.
Runtime support for parallelism---such as multithreaded
record {\&} replay, data race detectors, transactional
memory, and enforcement of stronger memory
models---helps achieve these goals, but existing
commodity solutions slow programs substantially in
order to track (i.e., detect or control) an execution's
cross-thread dependences accurately. Prior work tracks
cross-thread dependences either ``pessimistically,''
slowing every program access, or ``optimistically,''
allowing for lightweight instrumentation of most
accesses but dramatically slowing accesses involved in
cross-thread dependences. This paper seeks to hybridize
pessimistic and optimistic tracking, which is
challenging because there exists a fundamental mismatch
between pessimistic and optimistic tracking. We address
this challenge based on insights about how dependence
tracking and program synchronization interact, and
introduce a novel approach called hybrid tracking.
Hybrid tracking is suitable for building efficient
runtime support, which we demonstrate by building
hybrid-tracking-based versions of a dependence recorder
and a region serializability enforcer. An adaptive,
profile-based policy makes runtime decisions about
switching between pessimistic and optimistic tracking.
Our evaluation shows that hybrid tracking enables
runtime support to overcome the performance limitations
of both pessimistic and optimistic tracking alone.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Wang:2016:MGM,
author = "Tianzheng Wang and Milind Chabbi and Hideaki Kimura",
title = "Be my guest: {MCS} lock now welcomes guests",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "21:1--21:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851160",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The MCS lock is one of the most prevalent queuing
locks. It provides fair scheduling and high performance
on massively parallel systems. However, the MCS lock
mandates a bring-your-own-context policy: each lock
user must provide an additional context (i.e., a queue
node) to interact with the lock. This paper proposes
MCSg, a variant of the MCS lock that relaxes this
restriction. Our key observation is that not all lock
users are created equal. We analyzed how locks are used
in massively-parallel modern systems, such as
NUMA-aware operating systems and databases. We found
that such systems often have a small number of
``regular'' code paths that enter the lock very
frequently. Such code paths are the primary beneficiary
of the high scalability of MCS locks. However, there
are also many ``guest'' code paths that infrequently
enter the lock and do not need the same degree of
fairness to access the lock (e.g., background tasks
that only run periodically with lower priority). These
guest users, which are typically spread out in various
modules of the software, prefer context-free locks,
such as ticket locks. MCSg provides these guests a
context-free interface while regular users still enjoy
the benefits provided by MCS. It can also be used as a
drop-in replacement of MCS for more advanced locks,
such as cohort locking. We also propose MCSg++, an
extended version of MCSg, which avoids guest starvation
and non-FIFO behaviors that might happen with MCSg. Our
evaluation using microbenchmarks and the TPC-C database
benchmark on a 16-socket, 240-core server shows that
both MCSg and MCSg++ preserve the benefits of MCS for
regular users while providing a context-free interface
for guests.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Chabbi:2016:CCL,
author = "Milind Chabbi and John Mellor-Crummey",
title = "Contention-conscious, locality-preserving locks",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "22:1--22:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851166",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the last decade, the growing use of
cache-coherent NUMA architectures has spurred the
development of numerous locality-preserving mutual
exclusion algorithms. NUMA-aware locks such as HCLH,
HMCS, and cohort locks exploit locality of reference
among nearby threads to deliver high lock throughput
under high contention. However, the hierarchical nature
of these locality-aware locks increases latency, which
reduces the throughput of uncontended or
lightly-contended critical sections. To date, no lock
design for NUMA systems has delivered both low latency
under low contention and high throughput under high
contention. In this paper, we describe the design and
evaluation of an adaptive mutual exclusion scheme
(AHMCS lock), which employs several orthogonal
strategies---a hierarchical MCS (HMCS) lock for high
throughput under high contention, Lamport's fast path
approach for low latency under low contention, an
adaptation mechanism that employs hysteresis to balance
latency and throughput under moderate contention, and
hardware transactional memory for lowest latency in the
absence of contention. The result is a top performing
lock that has most properties of an ideal mutual
exclusion algorithm. AHMCS exploits the strengths of
multiple contention management techniques to deliver
high performance over a broad range of contention
levels. Our empirical evaluations demonstrate the
effectiveness of AHMCS over prior art.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Kalikar:2016:DNM,
author = "Saurabh Kalikar and Rupesh Nasre",
title = "{DomLock}: a new multi-granularity locking technique
for hierarchies",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "23:1--23:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851164",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present efficient locking mechanisms for
hierarchical data structures. Several applications work
on an abstract hierarchy of objects, and a parallel
execution on this hierarchy necessitates
synchronization across workers operating on different
parts of the hierarchy. Existing synchronization
mechanisms are either too coarse, too inefficient, or
too ad hoc, resulting in reduced or unpredictable
amount of concurrency. We propose a new locking
approach based on the structural properties of the
underlying hierarchy. We show that the developed
techniques are efficient even when the hierarchy is an
arbitrary graph, and are applicable even when the
hierarchy involves mutation. Theoretically, we present
our approach as a locking-cost-minimizing instance of a
generic algebraic model of synchronization for
hierarchical data structures. Using STMBench7, we
illustrate considerable reduction in the locking cost,
resulting in an average throughput improvement of
42\%.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Ritson:2016:BWM,
author = "Carl G. Ritson and Scott Owens",
title = "Benchmarking weak memory models",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "24:1--24:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851150",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To achieve good multi-core performance, modern
microprocessors have weak memory models, rather than
enforce sequential consistency. This gives the
programmer a wide scope for choosing exactly how to
implement various aspects of inter-thread communication
through the system's shared memory. However, these
choices come with both semantic and performance
consequences, often in tension with each other. In this
paper, we focus on the performance side, and define
techniques for evaluating the impact of various choices
in using weak memory models, such as where to put
fences, and which fences to use. We make no attempt to
judge certain strategies as best or most efficient, and
instead provide the techniques that will allow the
programmer to understand the performance implications
when identifying and resolving any semantic/performance
trade-offs. In particular, our technique supports the
reasoned selection of macrobenchmarks to use in
investigating trade-offs in using weak memory models.
We demonstrate our technique on both synthetic
benchmarks and real-world applications for the Linux
Kernel and OpenJDK Hotspot Virtual Machine on the ARMv8
and POWERv7 architectures.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Narayanaswamy:2016:VCA,
author = "Ganesh Narayanaswamy and Saurabh Joshi and Daniel
Kroening",
title = "The virtues of conflict: analysing modern
concurrency",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "25:1--25:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851165",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern shared memory multiprocessors permit reordering
of memory operations for performance reasons. These
reorderings are often a source of subtle bugs in
programs written for such architectures. Traditional
approaches to verify weak memory programs often rely on
interleaving semantics, which is prone to state space
explosion, and thus severely limits the scalability of
the analysis. In recent times, there has been a renewed
interest in modelling dynamic executions of weak memory
programs using partial orders. However, such an
approach typically requires ad-hoc mechanisms to
correctly capture the data and control-flow
choices/conflicts present in real-world programs. In
this work, we propose a novel, conflict-aware,
composable, truly concurrent semantics for programs
written using C/C++ for modern weak memory
architectures. We exploit our symbolic semantics based
on general event structures to build an efficient
decision procedure that detects assertion violations in
bounded multi-threaded programs. Using a large,
representative set of benchmarks, we show that our
conflict-aware semantics outperforms the
state-of-the-art partial-order based approaches.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Perrin:2016:CCB,
author = "Matthieu Perrin and Achour Mostefaoui and Claude
Jard",
title = "Causal consistency: beyond memory",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "26:1--26:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851170",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In distributed systems where strong consistency is
costly when not impossible, causal consistency provides
a valuable abstraction to represent program executions
as partial orders. In addition to the sequential
program order of each computing entity, causal order
also contains the semantic links between the events
that affect the shared objects --- messages emission
and reception in a communication channel, reads and
writes on a shared register. Usual approaches based on
semantic links are very difficult to adapt to other
data types such as queues or counters because they
require a specific analysis of causal dependencies for
each data type. This paper presents a new approach to
define causal consistency for any abstract data type
based on sequential specifications. It explores,
formalizes and studies the differences between three
variations of causal consistency and highlights them in
the light of PRAM, eventual consistency and sequential
consistency: weak causal consistency, that captures the
notion of causality preservation when focusing on
convergence; causal convergence that mixes weak causal
consistency and convergence; and causal consistency,
that coincides with causal memory when applied to
shared memory.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Chatzopoulos:2016:EES,
author = "Georgios Chatzopoulos and Aleksandar Dragojevi{\'c}
and Rachid Guerraoui",
title = "{ESTIMA}: extrapolating scalability of in-memory
applications",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "27:1--27:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851159",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents ESTIMA, an easy-to-use tool for
extrapolating the scalability of in-memory
applications. ESTIMA is designed to perform a simple,
yet important task: given the performance of an
application on a small machine with a handful of cores,
ESTIMA extrapolates its scalability to a larger machine
with more cores, while requiring minimum input from the
user. The key idea underlying ESTIMA is the use of
stalled cycles (e.g. cycles that the processor spends
waiting for various events, such as cache misses or
waiting on a lock). ESTIMA measures stalled cycles on a
few cores and extrapolates them to more cores,
estimating the amount of waiting in the system. ESTIMA
can be effectively used to predict the scalability of
in-memory applications. For instance, using
measurements of memcached and SQLite on a desktop
machine, we obtain accurate predictions of their
scalability on a server. Our extensive evaluation on a
large number of in-memory benchmarks shows that ESTIMA
has generally low prediction errors.",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Muddukrishna:2016:GGO,
author = "Ananya Muddukrishna and Peter A. Jonsson and Artur
Podobas and Mats Brorsson",
title = "Grain graphs: {OpenMP} performance analysis made
easy",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "28:1--28:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851156",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Average programmers struggle to solve performance
problems in OpenMP programs with tasks and parallel
for-loops. Existing performance analysis tools
visualize OpenMP task performance from the runtime
system's perspective where task execution is
interleaved with other tasks in an unpredictable order.
Problems with OpenMP parallel for-loops are similarly
difficult to resolve since tools only visualize
aggregate thread-level statistics such as load
imbalance without zooming into a per-chunk granularity.
The runtime system/threads oriented visualization
provides poor support for understanding problems with
task and chunk execution time, parallelism, and memory
hierarchy utilization, forcing average programmers to
rely on experts or use tedious trial-and-error tuning
methods for performance. We present grain graphs, a new
OpenMP performance analysis method that visualizes
grains --- computation performed by a task or a
parallel for-loop chunk instance --- and highlights
problems such as low parallelism, work inflation and
poor parallelization benefit at the grain level. We
demonstrate that grain graphs can quickly reveal
performance problems that are difficult to detect and
characterize in fine detail using existing
visualizations in standard OpenMP programs, simplifying
OpenMP performance analysis. This enables average
programmers to make portable optimizations for poor
performing OpenMP programs, reducing pressure on
experts and removing the need for tedious
trial-and-error tuning.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Machado:2016:PGC,
author = "Nuno Machado and Brandon Lucia and Lu{\'\i}s
Rodrigues",
title = "Production-guided concurrency debugging",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "29:1--29:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851149",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrency bugs that stem from schedule-dependent
branches are hard to understand and debug, because
their root causes imply not only different event
orderings, but also changes in the control-flow between
failing and non-failing executions. We present Cortex:
a system that helps exposing and understanding
concurrency bugs that result from schedule-dependent
branches, without relying on information from failing
executions. Cortex preemptively exposes failing
executions by perturbing the order of events and
control-flow behavior in non-failing schedules from
production runs of a program. By leveraging this
information from production runs, Cortex synthesizes
executions to guide the search for failing schedules.
Production-guided search helps cope with the large
execution search space by targeting failing executions
that are similar to observed non-failing executions.
Evaluation on popular benchmarks shows that Cortex is
able to expose failing schedules with only a few
perturbations to non-failing executions, and takes a
practical amount of time.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Farooqui:2016:AAW,
author = "Naila Farooqui and Rajkishore Barik and Brian T. Lewis
and Tatiana Shpeisman and Karsten Schwan",
title = "Affinity-aware work-stealing for integrated {CPU--GPU}
processors",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "30:1--30:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851194",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent integrated CPU-GPU processors like Intel's
Broadwell and AMD's Kaveri support hardware CPU-GPU
shared virtual memory, atomic operations, and memory
coherency. This enables fine-grained CPU-GPU
work-stealing, but architectural differences between
the CPU and GPU hurt the performance of
traditionally-implemented work-stealing on such
processors. These architectural differences include
different clock frequencies, atomic operation costs,
and cache and shared memory latencies. This paper
describes a preliminary implementation of our
work-stealing scheduler, Libra, which includes
techniques to deal with these architectural differences
in integrated CPU-GPU processors. Libra's
affinity-aware techniques achieve significant
performance gains over classically-implemented
work-stealing. We show preliminary results using a
diverse set of nine regular and irregular workloads
running on an Intel Broadwell Core-M processor. Libra
currently achieves up to a 2$ \times $ performance
improvement over classical work-stealing, with a 20\%
average improvement.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Gindraud:2016:ICM,
author = "Fran{\c{c}}ois Gindraud and Fabrice Rastello and
Albert Cohen and Fran{\c{c}}ois Broquedis",
title = "An interval constrained memory allocator for the
{Givy} {GAS} runtime",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "31:1--31:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851195",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The shared memory model helps parallel programming
productivity, but it also has a high hardware cost and
imposes scalability constraints. Ultimately, higher
performance will use distributed memories, which scales
better but requires programmers to manually transfer
data between local memories, which is a complex task.
Distributed memories are also more energy efficient
than shared memories, and are used in a family of
embedded computing solutions called multi processor
system on chip (MPSoC).",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Chang:2016:PSF,
author = "Li-Wen Chang and Izzat {El Hajj} and Hee-Seok Kim and
Juan G{\'o}mez-Luna and Abdul Dakkak and Wen-mei Hwu",
title = "A programming system for future proofing performance
critical libraries",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "32:1--32:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851178",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Tangram, a programming system for writing
performance-portable programs. The language enables
programmers to write computation and composition
codelets, supported by tuning knobs and primitives for
expressing data parallelism and work decomposition. The
compiler and runtime use a set of techniques such as
hierarchical composition, coarsening, data placement,
tuning, and runtime selection based on input
characteristics and micro-profiling. The resulting
performance is competitive with optimized vendor
libraries.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Nielsen:2016:SLF,
author = "Jesper Puge Nielsen and Sven Karlsson",
title = "A scalable lock-free hash table with open addressing",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "33:1--33:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851196",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent data structures synchronized with locks do
not scale well with the number of threads. As more
scalable alternatives, concurrent data structures and
algorithms based on widely available, however advanced,
atomic operations have been proposed. These data
structures allow for correct and concurrent operations
without any locks. In this paper, we present a new
fully lock-free open addressed hash table with a
simpler design than prior published work. We split hash
table insertions into two atomic phases: first
inserting a value ignoring other concurrent operations,
then in the second phase resolve any duplicate or
conflicting values. Our hash table has a constant and
low memory usage that is less than existing lock-free
hash tables at a fill level of 33\% and above. The hash
table exhibits good cache locality. Compared to prior
art, our hash table results in 16\% and 15\% fewer L1
and L2 cache misses respectively, leading to 21\% fewer
memory stall cycles. Our experiments show that our hash
table scales close to linearly with the number of
threads and outperforms, in throughput, other lock-free
hash tables by 19\%.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Maier:2016:CHT,
author = "Tobias Maier and Peter Sanders and Roman Dementiev",
title = "Concurrent hash tables: fast and general?(!)",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "34:1--34:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851188",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent hash tables are one of the most important
concurrent data structures with numerous applications.
Since hash table accesses can dominate the execution
time of the overall application, we need
implementations that achieve good speedup.
Unfortunately, currently available concurrent hashing
libraries turn out to be far away from this requirement
in particular when contention on some elements occurs.
Our starting point for better performing data
structures is a fast and simple lock-free concurrent
hash table based on linear probing that is limited to
word-sized key-value types and does not support dynamic
size adaptation. We explain how to lift these
limitations in a provably scalable way and demonstrate
that dynamic growing has a performance overhead
comparable to the same generalization in sequential
hash tables. We perform extensive experiments comparing
the performance of our implementations with six of the
most widely used concurrent hash tables. Ours are
considerably faster than the best algorithms with
similar restrictions and an order of magnitude faster
than the best more general tables. In some extreme
cases, the difference even approaches four orders of
magnitude.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Prades:2016:CAX,
author = "Javier Prades and Carlos Rea{\~n}o and Federico
Silla",
title = "{CUDA} acceleration for {Xen} virtual machines in
{InfiniBand} clusters with {rCUDA}",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "35:1--35:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851181",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many data centers currently use virtual machines (VMs)
to achieve a more efficient usage of hardware
resources. However, current virtualization solutions,
such as Xen, do not easily provide graphics processing
unit (GPU) accelerators to applications running in the
virtualized domain with the flexibility usually
required in data centers (i.e., managing virtual GPU
instances and concurrently sharing them among several
VMs). Remote GPU virtualization frameworks such as the
rCUDA solution may address this problem. In this work
we analyze the use of the rCUDA framework to accelerate
scientific applications running inside Xen VMs. Results
show that the use of the rCUDA framework is a feasible
approach, featuring a very low overhead if an
InfiniBand fabric is already present in the cluster.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Umar:2016:EPF,
author = "Ibrahim Umar and Otto J. Anshus and Phuong H. Ha",
title = "Effect of portable fine-grained locality on energy
efficiency and performance in concurrent search trees",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "36:1--36:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851186",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent research has suggested that improving
fine-grained data-locality is one of the main
approaches to improving energy efficiency and
performance. However, no previous research has
investigated the effect of the approach on these
metrices in the case of concurrent data structures.
This paper investigates how fine-grained data locality
influences energy efficiency and performance in
concurrent search trees, a crucial data structure that
is widely used in several important systems. We conduct
a set of experiments on three lock-based concurrent
search trees: DeltaTree, a portable fine-grained
locality-aware concurrent search tree; CBTree, a
coarse-grained locality-aware B+tree; and BST-TK, a
locality-oblivious concurrent search tree. We run the
experiments on a commodity x86 platform and an embedded
ARM platform. The experimental results show that
DeltaTree has 13--25\% better energy efficiency and
10--22\% more operations/second on the x86 and ARM
platforms, respectively. The results confirm that
portable fine-grained locality can improve energy
efficiency and performance in concurrent search
trees.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Parikh:2016:EDW,
author = "Hrushit Parikh and Vinit Deodhar and Ada Gavrilovska
and Santosh Pande",
title = "Efficient distributed workstealing via matchmaking",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "37:1--37:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851175",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many classes of high-performance applications and
combinatorial problems exhibit large degree of runtime
load variability. One approach to achieving balanced
resource use is to over decompose the problem on
fine-grained tasks that are then dynamically balanced
using approaches such as workstealing. Existing work
stealing techniques for such irregular applications,
running on large clusters, exhibit high overheads due
to potential untimely interruption of busy nodes,
excessive communication messages and delays experienced
by idle nodes in finding work due to repeated failed
steals. We contend that the fundamental problem of
distributed work-stealing is of rapidly bringing
together work producers and consumers. In response, we
develop an algorithm that performs timely, lightweight
and highly efficient matchmaking between work producers
and consumers which results in accurate load balance.
Experimental evaluations show that our scheduler is
able to outperform other distributed work stealing
schedulers, and to achieve scale beyond what is
possible with current approaches.",
acknowledgement = ack-nhfb,
articleno = "37",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Luo:2016:DCC,
author = "Hao Luo and Guoyang Chen and Pengcheng Li and Chen
Ding and Xipeng Shen",
title = "Data-centric combinatorial optimization of parallel
code",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "38:1--38:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851182",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Memory performance is one essential factor for tapping
into the full potential of the massive parallelism of
GPU. It has motivated some recent efforts in GPU cache
modeling. This paper presents a new data-centric way to
model the performance of a system with heterogeneous
memory resources. The new model is composable, meaning
it can predict the performance difference due to
placing data differently by profiling the execution
just once.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Maleki:2016:DSD,
author = "Saeed Maleki and Donald Nguyen and Andrew Lenharth and
Mar{\'\i}a Garzar{\'a}n and David Padua and Keshav
Pingali",
title = "{DSMR}: a shared and distributed memory algorithm for
single-source shortest path problem",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "39:1--39:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851183",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The Single-Source Shortest Path (SSSP) problem is to
find the shortest paths from a source vertex to all
other vertices in a graph. In this paper, we introduce
the Dijkstra Strip-Mined Relaxation (DSMR) algorithm,
an efficient parallel SSSP algorithm for shared and
distributed memory systems. Our results show that, DSMR
is faster than parallel \Delta -Stepping by a factor of
up-to 1.66.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Salucci:2016:GMC,
author = "Luca Salucci and Daniele Bonetta and Stefan Marr and
Walter Binder",
title = "Generic messages: capability-based shared memory
parallelism for event-loop systems",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "40:1--40:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851184",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Systems based on event-loops have been popularized by
Node.JS, and are becoming a key technology in the
domain of cloud computing. Despite their popularity,
such systems support only share-nothing parallelism via
message passing between parallel entities usually
called workers. In this paper, we introduce a novel
parallel programming abstraction called Generic
Messages (GEMs), which enables shared-memory
parallelism for share-nothing event-based systems. A
key characteristic of GEMs is that they enable workers
to share state by specifying how the state can be
accessed once it is shared. We call this aspect of the
GEMs model capability-based parallelism.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Liu:2016:HCG,
author = "Jianqiao Liu and Nikhil Hegde and Milind Kulkarni",
title = "Hybrid {CPU--GPU} scheduling and execution of tree
traversals",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "41:1--41:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851174",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "GPUs offer the promise of massive, power-efficient
parallelism. However, exploiting this parallelism
requires code to be carefully structured to deal with
the limitations of the SIMT execution model. In recent
years, there has been much interest in mapping
irregular applications to GPUs: applications with
unpredictable, data-dependent behaviors. While most of
the work in this space has focused on ad hoc
implementations of specific algorithms, recent work has
looked at generic techniques for mapping a large class
of tree traversal algorithms to GPUs, through careful
restructuring of the tree traversal algorithms to make
them behave more regularly. Unfortunately, even this
general approach for GPU execution of tree traversal
algorithms is reliant on ad hoc, handwritten,
algorithm-specific scheduling ( i.e., assignment of
threads to warps) to achieve high performance. The key
challenge of scheduling is that it is a highly
irregular process, that requires the inspection of
thread behavior and then careful sorting of the threads
into warps. In this paper, we present a novel
scheduling and execution technique for tree traversal
algorithms that is both general and automatic. The key
novelty is a hybrid approach: the GPU partially
executes tasks to inspect thread behavior and transmits
information back to the CPU, which uses that
information to perform the scheduling itself, before
executing the remaining, carefully scheduled, portion
of the traversals on the GPU. We applied this framework
to five tree traversal algorithms, achieving
significant speedups over optimized GPU code that does
not perform application-specific scheduling. Further,
we show that in many cases, our hybrid approach is able
to deliver better performance even than GPU code that
uses hand-tuned, application-specific scheduling.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Ramachandran:2016:IEI,
author = "Arunmoezhi Ramachandran and Neeraj Mittal",
title = "Improving efficacy of internal binary search trees
using local recovery",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "42:1--42:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851173",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Binary Search Tree (BST) is an important data
structure for managing ordered data. Many
algorithms---blocking as well as non-blocking---have
been proposed for concurrent manipulation of a binary
search tree in an asynchronous shared memory system
that supports search, insert and delete operations
based on both external and internal representations of
a search tree. An important step in executing an
operation on a tree is to traverse the tree from
top-to-down in order to locate the operation's window.
A process may need to perform this traversal several
times to handle any failures occurring due to other
processes performing conflicting actions on the tree.
Most concurrent algorithms that have proposed so far
use a na{\"\i}ve approach and simply restart the
traversal from the root of the tree. In this work, we
present a new approach to recover from such failures
more efficiently in a concurrent binary search tree
based on internal representation using local recovery
by restarting the traversal from the ``middle'' of the
tree in order to locate an operation's window. Our
approach is sufficiently general in the sense that it
can be applied to a variety of concurrent binary search
trees based on both blocking and non-blocking
approaches. Using experimental evaluation, we
demonstrate that our local recovery approach can yield
significant speed-ups of up to 69\% for many concurrent
algorithms.",
acknowledgement = ack-nhfb,
articleno = "42",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Merrill:2016:MBS,
author = "Duane Merrill and Michael Garland",
title = "Merge-based sparse matrix-vector multiplication
{(SpMV)} using the {CSR} storage format",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "43:1--43:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851190",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a perfectly balanced, ``merge-based''
parallel method for computing sparse matrix-vector
products (SpMV). Our algorithm operates directly upon
the Compressed Sparse Row (CSR) sparse matrix format, a
predominant in-memory representation for
general-purpose sparse linear algebra computations. Our
CsrMV performs an equitable multi-partitioning of the
input dataset, ensuring that no single thread can be
overwhelmed by assignment to (a) arbitrarily-long rows
or (b) an arbitrarily-large number of zero-length rows.
This parallel decomposition requires neither offline
preprocessing nor specialized/ancillary data formats.
We evaluate our method on both CPU and GPU
microarchitecture across an enormous corpus of diverse
real world matrix datasets. We show that traditional
CsrMV methods are inconsistent performers subject to
order-of-magnitude slowdowns, whereas the performance
response of our method is substantially impervious to
row-length heterogeneity.",
acknowledgement = ack-nhfb,
articleno = "43",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Drebes:2016:NAS,
author = "Andi Drebes and Antoniu Pop and Karine Heydemann and
Nathalie Drach and Albert Cohen",
title = "{NUMA}-aware scheduling and memory allocation for
data-flow task-parallel applications",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "44:1--44:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851193",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic task parallelism is a popular programming
model on shared-memory systems. Compared to data
parallel loop-based concurrency, it promises enhanced
scalability, load balancing and locality. These
promises, however, are undermined by non-uniform memory
access (NUMA) systems. We show that it is possible to
preserve the uniform hardware abstraction of
contemporary task-parallel programming models, for both
computing and memory resources, while achieving
near-optimal data locality. Our run-time algorithms for
NUMA-aware task and data placement are fully automatic,
application-independent, performance-portable across
NUMA machines, and adapt to dynamic changes. Placement
decisions use information about inter-task data
dependences and reuse. This information is readily
available in the run-time systems of modern
task-parallel programming frameworks, and from the
operating system regarding the placement of previously
allocated memory. Our algorithms take advantage of
data-flow style task parallelism, where the
privatization of task data enhances scalability through
the elimination of false dependences and enables
fine-grained dynamic control over the placement of
application data. We demonstrate that the benefits of
dynamically managing data placement outweigh the
privatization cost, even when comparing with
target-specific optimizations through static,
NUMA-aware data interleaving. Our implementation and
the experimental evaluation on a set of
high-performance benchmarks executing on a 192-core
system with 24 NUMA nodes show that the fraction of
local memory accesses can be increased to more than
99\%, resulting in a speedup of up to 5$ \times $
compared to a NUMA-aware hierarchical work-stealing
baseline.",
acknowledgement = ack-nhfb,
articleno = "44",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Mohamedin:2016:DNA,
author = "Mohamed Mohamedin and Roberto Palmieri and Sebastiano
Peluso and Binoy Ravindran",
title = "On designing {NUMA}-aware concurrency control for
scalable transactional memory",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "45:1--45:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851189",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "NUMA architectures posed the challenge of rethinking
parallel applications due to the non-homogeneity
introduced by their design, and their real benefits are
limited to the characteristics of the particular
workload. We name as partitionable transactional
workloads such workloads that may be able to exploit
the distributed nature of NUMA, such as transactional
workloads where data and accesses can be easily
partitioned among the so called NUMA zones. However, in
case those workloads require the synchronization on
shared data, we have to face the issue of exploiting
the NUMA architecture also in the concurrency control
for their transactions. Therefore in this paper we
present a NUMA-aware concurrency control for
transactional memory that we designed for promoting
scalability in scenarios where both the transactional
workload is prone to scale, and the characteristics of
the underlying memory model are inherently non-uniform,
such as NUMA architectures.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Saad:2016:OTC,
author = "Mohamed M. Saad and Roberto Palmieri and Binoy
Ravindran",
title = "On ordering transaction commit",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "46:1--46:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851191",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this poster paper, we briefly introduce an
effective solution to address the problem of committing
transactions enforcing a predefined order. To do that,
we overview the design of two algorithms that deploy a
cooperative transaction execution that circumvents the
transaction isolation constraint in favor of
propagating written values among conflicting
transactions. A preliminary implementation shows that
even in the presence of data conflicts, the proposed
algorithms outperform other competitors,
significantly.",
acknowledgement = ack-nhfb,
articleno = "46",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Qian:2016:ODG,
author = "Xuehai Qian and Koushik Sen and Paul Hargrove and
Costin Iancu",
title = "{OPR}: deterministic group replay for one-sided
communication",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "47:1--47:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851179",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The ability to reproduce a parallel execution is
desirable for debugging and program reliability
purposes. In debugging (13), the programmer needs to
manually step back in time, while for resilience (6)
this is automatically performed by the application upon
failure. To be useful, replay has to faithfully
reproduce the original execution. For parallel programs
the main challenge is inferring and maintaining the
order of conflicting operations (data races).
Deterministic record and replay (R{\&}R) techniques
have been developed for multithreaded shared memory
programs (5), as well as distributed memory programs
(14). Our main interest is techniques for large scale
scientific (3; 4) programming models.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Rabozzi:2016:PAP,
author = "Marco Rabozzi and Matteo Mazzucchelli and Roberto
Cordone and Giovanni Matteo Fumarola and Marco D.
Santambrogio",
title = "Preemption-aware planning on big-data systems",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "48:1--48:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851187",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent developments in Big Data frameworks are moving
towards reservation based approaches as a mean to
manage the increasingly complex mix of computations,
whereas preemption techniques are employed to meet
strict jobs deadlines. Within this work we propose and
evaluate a new planning algorithm in the context of
reservation based scheduling. Our approach is able to
achieve high cluster utilization while minimizing the
need for preemption that causes system overheads and
planning mispredictions.",
acknowledgement = ack-nhfb,
articleno = "48",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Chen:2016:SPN,
author = "Yifeng Chen and Kun Huang and Bei Wang and Guohui Li
and Xiang Cui",
title = "{Samsara Parallel}: a non-{BSP} parallel-in-time
model",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "49:1--49:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851185",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many time-dependent problems like molecular dynamics
of protein folding require a large number of time
steps. The latencies and overheads of common-purpose
clusters with accelerators are too big for
high-frequency iteration. We introduce an algorithmic
model called Samsara Parallel (or SP) which, unlike
BSP, relies on asynchronous communications and can
repeatedly return to earlier time steps to refine the
precision of computation. This also extends a line of
research called Parallel-in-Time in computational
chemistry and physics.",
acknowledgement = ack-nhfb,
articleno = "49",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Zhang:2016:SAN,
author = "Mingzhe Zhang and Francis C. M. Lau and Cho-Li Wang
and Luwei Cheng and Haibo Chen",
title = "Scalable adaptive {NUMA}-aware lock: combining local
locking and remote locking for efficient concurrency",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "50:1--50:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851176",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scalable locking is a key building block for scalable
multi-threaded software. Its performance is especially
critical in multi-socket, multi-core machines with
non-uniform memory access (NUMA). Previous schemes such
as local locking and remote locking only perform well
under a certain level of contention, and often require
non-trivial tuning for a particular configuration.
Besides, for large NUMA systems, because of unmanaged
lock server's nomination, current distance-first NUMA
policies cannot perform satisfactorily. In this work,
we propose SANL, a locking scheme that can deliver high
performance under various contention levels by
adaptively switching between the local and the remote
lock scheme. Furthermore, we introduce a new NUMA
policy for the remote lock that jointly considers node
distances and server utilization when choosing lock
servers. A comparison with seven representative locking
schemes shows that SANL outperforms the others in most
contention situations. In one group test, SANL is 3.7
times faster than RCL lock and 17 times faster than
POSIX mutex.",
acknowledgement = ack-nhfb,
articleno = "50",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Hegde:2016:SRS,
author = "Nikhil Hegde and Jianqiao Liu and Milind Kulkarni",
title = "{SPIRIT}: a runtime system for distributed irregular
tree applications",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "51:1--51:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851177",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Repeated, depth-first traversal of trees is a common
algorithmic pattern in an important set of applications
from diverse domains such as cosmological simulations,
data mining, and computer graphics. As these
applications operate over massive data sets, it is
often necessary to distribute the trees to process all
of the data. In this work, we introduce SPIRIT, a
runtime system to ease the writing of distributed tree
applications. SPIRIT automates the challenging tasks of
tree distribution, optimizing communication and
parallelizing independent computations. The common
algorithmic pattern in tree traversals is exploited to
effectively schedule parallel computations and improve
locality. As a result, pipeline parallelism in
distributed traversals is identified, which is
complemented by load-balancing, and locality-enhancing,
message aggregation optimizations. Evaluation of SPIRIT
on tree traversal in Point Correlation (PC) shows a
scalable system, achieving speedups upto 38x on a
16-node, 64 process system compared to a 1-node,
baseline configuration. We also find that SPIRIT
results in substantially less communication and
achieves significant performance improvements over
implementations in other distributed graph systems.",
acknowledgement = ack-nhfb,
articleno = "51",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Ramalhete:2016:TME,
author = "Pedro Ramalhete and Andreia Correia",
title = "{Tidex}: a mutual exclusion lock",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "52:1--52:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851171",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several basic mutual exclusion lock algorithms are
known, with one of the simplest being the Ticket Lock.
We present a new mutual exclusion lock with properties
similar to the Ticket Lock but using atomic_exchange()
instead of atomic_fetch_add() that can be more
efficient on systems without a native instruction for
atomic_fetch_add(), or in which the native instruction
for atomic_exchange() is faster than the one for
atomic_fetch_add(). Similarly to the Ticket Lock, our
lock has small memory foot print, is extremely simple,
respects FIFO order, and provides starvation freedom in
architectures that implement atomic_exchange() as a
single instruction, like x86.",
acknowledgement = ack-nhfb,
articleno = "52",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Mastoras:2016:UFC,
author = "Aristeidis Mastoras and Thomas R. Gross",
title = "Unifying fixed code and fixed data mapping of
load-imbalanced pipelined loops",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "53:1--53:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851172",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Some loops with cross-iteration dependences can
execute in parallel by pipelining. The loop body is
partitioned into stages such that the data dependences
are not violated and then the stages are mapped onto
threads. Two well-known mapping techniques are fixed
code and fixed data; they achieve high performance for
load-balanced loops, but they fail to perform well for
load-imbalanced loops. In this article, we present a
novel hybrid mapping that eliminates drawbacks of both
prior mapping techniques and enables dynamic scheduling
of stages.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Kurt:2016:UAS,
author = "Mehmet Can Kurt and Bin Ren and Sriram Krishnamoorthy
and Gagan Agrawal",
title = "User-assisted storage reuse determination for dynamic
task graphs",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "54:1--54:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851180",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Models based on task graphs that operate on
single-assignment data are attractive in several ways,
but also require nuanced algorithms for scheduling and
memory management for efficient execution. In this
paper, we consider memory-efficient dynamic scheduling
of task graphs, and present a novel approach for
dynamically recycling the memory locations assigned to
data items as they are produced by tasks.",
acknowledgement = ack-nhfb,
articleno = "54",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Rehman:2016:VMJ,
author = "Waqas Ur Rehman and Muhammad Sohaib Ayub and Junaid
Haroon Siddiqui",
title = "Verification of {MPI} {Java} programs using software
model checking",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "55:1--55:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851192",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Development of concurrent software requires the
programmer to be aware of non-determinism, data races,
and deadlocks. MPI (message passing interface) is a
popular standard for writing message oriented
distributed applications. Some messages in MPI systems
can be processed by one of the many machines and in
many possible orders. This non-determinism can affect
the result of an MPI application. The alternate results
may or may not be correct. To verify MPI applications,
we need to check all these possible orderings and use
an application specific oracle to decide if these
orderings give correct output. MPJ Express is an open
source Java implementation of the MPI standard. We
developed a Java based model of MPJ Express, where
processes are modeled as threads, and which can run
unmodified MPI Java programs on a single system. This
enabled us to adapt the Java PathFinder explicit state
software model checker (JPF) using a custom listener to
verify our model running real MPI Java programs. We
evaluated our approach using small examples where model
checking revealed message orders that would result in
incorrect system behavior.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Sarkar:2016:VEC,
author = "Vivek Sarkar",
title = "Virtualizing the Edge of the Cloud: the New Frontier",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "1--1",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892243",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the last two decades, virtualization technologies
have turned datacenter infrastructure into multitenant,
dynamically provisionable, elastic resource, and formed
the basis for the wide adoption of cloud computing.
Many of today's cloud applications, however, are based
on continuous interactions with end users and their
devices, and the trend is only expected to intensify
with the expansion of the Internet of Things. The
consequent bandwidth and latency requirements of these
emerging workloads push the cloud boundary outside of
traditional datacenters, giving rise to an edge tier in
the end-device-to-cloud-backend infrastructure.
Computational resources embedded in anything from
standalone microservers to WiFi routers and small cell
access points, and their open APIs, present
opportunities for deploying application logic and state
closer to where it is being used, addressing both
latency and backhaul bandwidth problems. This talk will
look at the role that existing virtualization
technologies can play in providing in this edge tier
the required flexibility, dynamic provisioning and
isolation, and will outline open problems that require
development of new solutions. We will also discuss the
opportunities to leverage these technologies to further
deal with the diversity in the end-user device and IoT
space.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Huang:2016:BKB,
author = "Yu-Ju Huang and Hsuan-Heng Wu and Yeh-Ching Chung and
Wei-Chung Hsu",
title = "Building a {KVM}-based Hypervisor for a Heterogeneous
System Architecture Compliant System",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "3--15",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892246",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Heterogeneous System Architecture (HSA) is an
architecture developed by the HSA foundation aiming at
reducing programmability barriers as well as improving
communication efficiency for heterogeneous computing.
For example, HSA allows heterogeneous computing
devices to share the same virtual address space. This
feature allows programmers to bypass explicit data
copying between devices, as was required in the past.
HSA features such as job dispatching through user level
queues and memory based signaling help to reduce
communication latency between the host and other
computing devices. While the new features in HSA enable
more efficient heterogeneous computing, they also
introduce new challenges to system virtualization,
especially in memory virtualization and I/O
virtualization. This work investigates the issues
involved in HSA virtualization and implements a
KVM-based hypervisor that supports the main features of
HSA inside guest operating systems. Furthermore, this
work shows that with the newly introduced hypervisor
for HSA, system resources in HSA-compliant AMD Kaveri
can be effectively shared between multiple guest
operating systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Ouyang:2016:SUV,
author = "Jiannan Ouyang and John R. Lange and Haoqiang Zheng",
title = "{Shoot4U}: Using {VMM} Assists to Optimize {TLB}
Operations on Preempted {vCPUs}",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "17--23",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892245",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Virtual Machine based approaches to workload
consolidation, as seen in IaaS cloud as well as
datacenter platforms, have long had to contend with
performance degradation caused by synchronization
primitives inside the guest environments. These
primitives can be affected by virtual CPU preemptions
by the host scheduler that can introduce delays that
are orders of magnitude longer than those primitives
were designed for. While a significant amount of work
has focused on the behavior of spinlock primitives as a
source of these performance issues, spinlocks do not
represent the entirety of synchronization mechanisms
that are susceptible to scheduling issues when running
in a virtualized environment. In this paper we address
the virtualized performance issues introduced by TLB
shootdown operations. Our profiling study, based on the
PARSEC benchmark suite, has shown that up to 64\% of a
VM's CPU time can be spent on TLB shootdown operations
under certain workloads. In order to address this
problem, we present a paravirtual TLB shootdown scheme
named Shoot4U. Shoot4U completely eliminates TLB
shootdown preemptions by invalidating guest TLB entries
from the VMM and allowing guest TLB shootdown
operations to complete without waiting for remote
virtual CPUs to be scheduled. Our performance
evaluation using the PARSEC benchmark suite
demonstrates that Shoot4U can reduce benchmark runtime
by up to 85\% compared an unmodified Linux kernel, and
up to 44\% over a state-of-the-art paravirtual TLB
shootdown scheme.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Merrifield:2016:PIE,
author = "Timothy Merrifield and H. Reza Taheri",
title = "Performance Implications of Extended Page Tables on
Virtualized x86 Processors",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "25--35",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892258",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Managing virtual memory is an expensive operation, and
becomes even more expensive on virtualized servers.
Processing TLB misses on a virtualized x86 server
requires a two-dimensional page walk that can have 6x
more page table lookups, hence 6x more memory
references, than a native page table walk. Thus much of
the recent research on the subject starts from the
assumption that TLB miss processing in virtual
environments is significantly more expensive than on
native servers. However, we will show that with the
latest software stack on modern x86 processors, most of
these page-table lookups are satisfied by internal
paging structure caches and the L1/L2 data caches, and
the actual virtualization overhead of TLB miss
processing is a modest fraction of the overall time
spent processing TLB misses. In this paper, we present
a detailed accounting of the TLB miss processing costs
on virtualized x86 servers for an exhaustive set of
workloads, in particular, two very demanding industry
standard workloads. We show that an implementation of
the TPC-C workload that actively uses 475 GB of memory
on a 72-CPU Haswell-EP server spends 20\% of its time
processing TLB misses when the application runs in a
VM. Although this is a non-trivial amount, it is only
4.2\% higher than the TLB miss processing costs on bare
metal. The multi-VM VMmark benchmark sees 12.3\% in TLB
miss processing, but only 4.3\% of that can be
attributed to virtualization overheads. We show that
even for the heaviest workloads, a well-tuned
application that uses large pages on a recent OS
release with a modern hypervisor running on the latest
x86 processors sees only minimal degradation from the
additional overhead of the two-dimensional page walks
in a virtualized server.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Nathan:2016:SRO,
author = "Senthil Nathan and Umesh Bellur and Purushottam
Kulkarni",
title = "On Selecting the Right Optimizations for Virtual
Machine Migration",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "37--49",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892247",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "To reduce the migration time of a virtual machine and
network traffic generated during migration, existing
works have proposed a number of optimizations to
pre-copy live migration. These optimizations are delta
compression, page skip, deduplication, and data
compression. The cost-benefit analysis of these
optimizations may preclude the use of certain
optimizations in specific scenarios. However, no study
has compared the performance {\&} cost of these
optimizations, and identified the impact of application
behaviour on performance gain. Hence, it is not clear
for a given migration scenario and an application, what
is the best optimization that one must employ? In this
paper, we present a comprehensive empirical study using
a large number of workloads to provide recommendations
on selection of optimizations for pre-copy live
migration. The empirical study reveals that page skip
is an important optimization as it reduces network
traffic by 20\% with negligible additional CPU cost.
Data compression yields impressive gains in reducing
network traffic (37\%) but at the cost of a significant
increase in CPU consumption (5$ \times $).
De-duplication needs to be applied with utmost care as
the increase in CPU utilization might outweigh the
benefits considerably. The combination of page skip and
data compression works the best across workloads and
results in a significant reduction in network traffic
(40\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Abe:2016:UVM,
author = "Yoshihisa Abe and Roxana Geambasu and Kaustubh Joshi
and Mahadev Satyanarayanan",
title = "Urgent Virtual Machine Eviction with Enlightened
Post-Copy",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "51--64",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892252",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Virtual machine (VM) migration demands distinct
properties under resource oversubscription and workload
surges. We present enlightened post-copy, a new
mechanism for VMs under contention that evicts the
target VM with fast execution transfer and short total
duration. This design contrasts with common live
migration, which uses the down time of the migrated VM
as its primary metric; it instead focuses on recovering
the aggregate performance of the VMs being affected. In
enlightened post-copy, the guest OS identifies memory
state that is expected to encompass the VM's working
set. The hypervisor accordingly transfers its state,
mitigating the performance impact on the migrated VM
resulting from post-copy transfer. We show that our
implementation, with modest instrumentation in guest
Linux, resolves VM contention up to several times
faster than live migration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Xu:2016:SHS,
author = "Xin Xu and Bhavesh Davda",
title = "{SRVM}: Hypervisor Support for Live Migration with
Passthrough {SR-IOV} Network Devices",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "65--77",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892256",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Single-Root I/O Virtualization (SR-IOV) is a
specification that allows a single PCI Express (PCIe)
device (physical function or PF) to be used as multiple
PCIe devices (virtual functions or VF). In a
virtualization system, each VF can be directly assigned
to a virtual machine (VM) in passthrough mode to
significantly improve the network performance. However,
VF passthrough mode is not compatible with live
migration, which is an essential capability that
enables many advanced virtualization features such as
high availability and resource provisioning. To solve
this problem, we design SRVM which provides hypervisor
support to ensure the VF device can be correctly used
by the migrated VM and the applications. SRVM is
implemented in the hypervisor without modification in
guest operating systems or guest VM drivers. Our
experimental results show that SRVM can effectively
migrate all memory state, and there is no data loss or
corruption in applications after live migration. SRVM
does not increase VM downtime. It only costs limited
resources (an extra CPU core), and there is no
significant runtime overhead in VM network
performance. In fact, since the VF can continue to be
used during the pre-copy phase, it offers network
throughput which is 9.6 times and network latency which
is 98\% lower compared to other solutions that switch
to para-virtualization mode during live migration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Williams:2016:EEH,
author = "Dan Williams and Yaohui Hu and Umesh Deshpande and
Piush K. Sinha and Nilton Bila and Kartik Gopalan and
Hani Jamjoom",
title = "Enabling Efficient Hypervisor-as-a-Service Clouds with
Ephemeral Virtualization",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "79--92",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892254",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When considering a hypervisor, cloud providers must
balance conflicting requirements for simple, secure
code bases with more complex, feature-filled offerings.
This paper introduces Dichotomy, a new two-layer cloud
architecture in which the roles of the hypervisor are
split. The cloud provider runs a lean hyperplexor that
has the sole task of multiplexing hardware and running
more substantial hypervisors (called featurevisors)
that implement features. Cloud users choose
featurevisors from a selection of lightly-modified
hypervisors potentially offered by third-parties in an
``as-a-service'' model for each VM. Rather than running
the featurevisor directly on the hyperplexor using
nested virtualization, Dichotomy uses a new
virtualization technique called ephemeral
virtualization which efficiently (and repeatedly)
transfers control of a VM between the hyperplexor and
featurevisor using memory mapping techniques. Nesting
overhead is only incurred when the VM is accessed by
the featurevisor. We have implemented Dichotomy in
KVM/QEMU and demonstrate average switching times of 80
ms, two to three orders of magnitude faster than live
VM migration. We show that, for the featurevisor
applications we evaluated, VMs hosted in Dichotomy
deliver up to 12\% better performance than those hosted
on nested hypervisors, and continue to show benefit
even when the featurevisor applications run as often as
every 2.5~seconds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Burtsev:2016:APV,
author = "Anton Burtsev and David Johnson and Mike Hibler and
Eric Eide and John Regehr",
title = "Abstractions for Practical Virtual Machine Replay",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "93--106",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892257",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficient deterministic replay of whole operating
systems is feasible and useful, so why isn't replay a
default part of the software stack? While implementing
deterministic replay is hard, we argue that the main
reason is the lack of general abstractions for
understanding and addressing the significant
engineering challenges involved in the development of a
replay engine for a modern VMM. We present a design
blueprint---a set of abstractions, general principles,
and low-level implementation details---for efficient
deterministic replay in a modern hypervisor. We build
and evaluate our architecture in Xen, a full-featured
hypervisor. Our architecture can be readily followed
and adopted, enabling replay as a ubiquitous part of a
modern virtualization stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{McKinley:2016:NGV,
author = "Kathryn S. McKinley",
title = "Next Generation Virtual Memory Management",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "107--107",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892244",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The goal of virtual memory is an abstraction of
infinite and private memory for every process.
Unfortunately, the insatiable memory demands of modern
applications increasingly violate this abstraction by
exposing capacity, bandwidth, and performance
limitations of modern hardware. Furthermore, emerging
memory technologies are likely to exacerbate this
problem. For instance, non-volatile memory differs from
DRAM due to its asymmetric read/write performance and
thus will likely be an addition rather than a drop-in
replacement for DRAM. This talk will describe these
problems and recent architecture and software
innovations that address of some of them. If adopted,
these solutions will impose substantial challenges for
operating system memory management, which has evolved
very slowly over the past 30 years. I will draw lessons
from the past 15 years of garbage collection advances
to suggest some promising directions for innovation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Qian:2016:EFS,
author = "Junjie Qian and Witawas Srisa-an and Sharad Seth and
Hong Jiang and Du Li and Pan Yi",
title = "Exploiting {FIFO} Scheduler to Improve Parallel
Garbage Collection Performance",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "109--121",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892248",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent studies have found that parallel garbage
collection performs worse with more CPUs and more
collector threads. As part of this work, we further
investigate this phenomenon and find that poor
scalability is worst in highly scalable Java
applications. Our investigation to find the causes
clearly reveals that efficient multi-threading in an
application can prolong the average object lifespan,
which results in less effective garbage collection. We
also find that prolonging lifespan is the direct result
of Linux's Completely Fair Scheduler due to its
round-robin like behavior that can increase the heap
contention between the application threads. Instead, if
we use pseudo first-in-first-out to schedule
application threads in large multicore systems, the
garbage collection scalability is significantly
improved while the time spent in garbage collection is
reduced by as much as 21\%. The average execution time
of the 24 Java applications used in our study is also
reduced by 11\%. Based on this observation, we propose
two approaches to optimally select scheduling policies
based on application scalability profile. Our first
approach uses the profile information from one
execution to tune the subsequent executions. Our second
approach dynamically collects profile information and
performs policy selection during execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Yu:2016:PAO,
author = "Yang Yu and Tianyang Lei and Weihua Zhang and Haibo
Chen and Binyu Zang",
title = "Performance Analysis and Optimization of Full Garbage
Collection in Memory-hungry Environments",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "123--130",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892251",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Garbage collection (GC), especially full GC, would
nontrivially impact overall application performance,
especially for those memory-hungry ones handling large
data sets. This paper presents an in-depth performance
analysis on the full GC performance of Parallel
Scavenge (PS), a state-of-the-art and the default
garbage collector in the HotSpot JVM, using traditional
and big-data applications running atop JVM on CPU
(e.g., Intel Xeon) and many-integrated cores (e.g.,
Intel Xeon i). The analysis uncovers that unnecessary
memory accesses and calculations during reference
updating in the compaction ase are the main causes of
lengthy full GC. To this end, this paper describes an
incremental query model for reference calculation,
which is further embodied with three schemes (namely
optimistic, sort-based and region-based) for different
query patterns. Performance evaluation shows that the
incremental query model leads to averagely 1.9X (up to
2.9X) in full GC and 19.3\% (up to 57.2\%) improvement
in application throughput, as well as 31.2\% reduction
in pause time over the vanilla PS collector on CPU, and
the numbers are 2.1X (up to 3.4X), 11.1\% (up to
41.2\%) and 34.9\% for Xeon i accordingly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Smith:2016:LMR,
author = "Rebecca Smith and Scott Rixner",
title = "Leveraging Managed Runtime Systems to Build, Analyze,
and Optimize Memory Graphs",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "131--143",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892253",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Optimizing memory management is a major challenge of
embedded systems programming, as memory is scarce.
Further, embedded systems often have heterogeneous
memory architectures, complicating the task of memory
allocation during both compilation and migration.
However, new opportunities for addressing these
challenges have been created by the recent emergence of
managed runtimes for embedded systems. By imposing
structure on memory, these systems have opened the
doors for new techniques for analyzing and optimizing
memory usage within embedded systems. This paper
presents GEM (Graphs of Embedded Memory), a tool which
capitalizes on the structure that managed runtime
systems provide in order to build memory graphs which
facilitate memory analysis and optimization. At GEM's
core are a set of fundamental graph transformations
which can be layered to support a wide range of use
cases, including interactive memory visualization,
de-duplication of objects and code, compilation for
heterogeneous memory architectures, and transparent
migration. Moreover, since the same underlying
infrastructure supports all of these orthogonal
functionalities, they can easily be applied together to
complement each other.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Ben-Yehuda:2016:NPM,
author = "Muli Ben-Yehuda and Orna Agmon Ben-Yehuda and Dan
Tsafrir",
title = "The nom Profit-Maximizing Operating System",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "145--160",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892250",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the near future, cloud providers will sell their
users virtual machines with CPU, memory, network, and
storage resources whose prices constantly change
according to market-driven supply and demand
conditions. Running traditional operating systems in
these virtual machines is a poor fit: traditional
operating systems are not aware of changing resource
prices and their sole aim is to maximize performance
with no consideration of costs. Consequently, they
yield low profits. We present nom, a profit-maximizing
operating system designed for cloud computing platforms
with dynamic resource prices. Applications running on
nom aim to maximize profits by optimizing
simultaneously for performance and resource costs. The
nom kernel provides them with direct access to the
underlying hardware and full control over their private
software stacks. Since nom applications know there is
no single ``best'' software stack, they adapt their
stacks' behavior on the fly according to the current
price of available resources and their private utility
from them, which differs between applications. We show
that in addition to achieving up to 3.9x better
throughput and up to 9.1x better latency, nom
applications yield up to 11.1x higher profits when
compared with the same applications running on Linux
and OSv.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Hale:2016:EHP,
author = "Kyle C. Hale and Peter A. Dinda",
title = "Enabling Hybrid Parallel Runtimes Through Kernel and
Virtualization Support",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "161--175",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892255",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In our hybrid runtime (HRT) model, a parallel runtime
system and the application are together transformed
into a specialized OS kernel that operates entirely in
kernel mode and can thus implement exactly its desired
abstractions on top of fully privileged hardware
access. We describe the design and implementation of
two new tools that support the HRT model. The first,
the Nautilus Aerokernel, is a kernel framework
specifically designed to enable HRTs for x64 and Xeon
Phi hardware. Aerokernel primitives are specialized for
HRT creation and thus can operate much faster, up to
two orders of magnitude faster, than related primitives
in Linux. Aerokernel primitives also exhibit much lower
variance in their performance, an important
consideration for some forms of parallelism. We have
realized several prototype HRTs, including one based on
the Legion runtime, and we provide application
macrobenchmark numbers for our Legion HRT. The second
tool, the hybrid virtual machine (HVM), is an extension
to the Palacios virtual machine monitor that allows a
single virtual machine to simultaneously support a
traditional OS and software stack alongside an HRT with
specialized hardware access. The HRT can be booted in a
time comparable to a Linux user process startup, and
functions in the HRT, which operate over the user
process's memory, can be invoked by the process with
latencies not much higher than those of a function
call.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Waldspurger:2016:SSL,
author = "Carl Waldspurger and Emery Berger and Abhishek
Bhattacharjee and Kevin Pedretti and Simon Peter and
Chris Rossbach",
title = "Sweet Spots and Limits for Virtualization",
journal = j-SIGPLAN,
volume = "51",
number = "7",
pages = "177--177",
month = jul,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3007611.2892249",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This year at VEE, we added a panel to discuss the
state of virtualization: what problems are solved? what
problems are important? and what problems may not be
worth solving? The panelist are experts in areas
ranging from hardware virtualization up to
language-level virtualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '16 conference proceedings.",
}
@Article{Nitu:2017:SBQ,
author = "Vlad Nitu and Pierre Olivier and Alain Tchana and
Daniel Chiba and Antonio Barbalace and Daniel Hagimont
and Binoy Ravindran",
title = "Swift Birth and Quick Death: Enabling Fast Parallel
Guest Boot and Destruction in the {Xen} Hypervisor",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "1--14",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050758",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The ability to quickly set up and tear down a virtual
machine is critical for today's cloud elasticity, as
well as in numerous other scenarios: guest
migration/consolidation, event-driven invocation of
micro-services, dynamically adaptive unikernel-based
applications, micro-reboots for security or stability,
etc. In this paper, we focus on the process of setting
up/freeing the hypervisor and host control layer data
structures at boot/destruction time, showing that it
does not scale in current virtualization solutions. In
addition to the direct overhead of long VM
set-up/destruction times, we demonstrate by
experimentation the indirect costs on real world auto
scaling systems. Focusing on the popular Xen
hypervisor, we identify three critical issues hindering
the scalability of the boot and destruction processes:
serialized boot, unscalable interactions with the
Xenstore at guest creation time, and remote NUMA memory
scrubbing at destruction time. For each of these issues
we present the design and implementation of a solution
in the Xen infrastructure: parallel boot with
fine-grained locking, caching of Xenstore data, and
local NUMA scrubbing. We evaluate these solutions using
micro-benchmarks, macro-benchmarks, and real world
datacenter traces. Results show that our work improves
the current Xen implementation by a significant factor,
for example macro-benchmarks indicate a speedup of more
than 4X in high-load scenarios.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Kuenzer:2017:UEC,
author = "Simon Kuenzer and Anton Ivanov and Filipe Manco and
Jose Mendes and Yuri Volchkov and Florian Schmidt and
Kenichi Yasukata and Michio Honda and Felipe Huici",
title = "Unikernels Everywhere: The Case for Elastic {CDNs}",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "15--29",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050757",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Video streaming dominates the Internet's overall
traffic mix, with reports stating that it will
constitute 90\% of all consumer traffic by 2019. Most
of this video is delivered by Content Delivery Networks
(CDNs), and, while they optimize QoE metrics such as
buffering ratio and start-up time, no single CDN
provides optimal performance. In this paper we make the
case for elastic CDNs, the ability to build virtual
CDNs on-the-fly on top of shared, third-party
infrastructure at a scale. To bring this idea closer to
reality we begin by large-scale simulations to quantify
the effects that elastic CDNs would have if deployed,
and build and evaluate MiniCache, a specialized,
minimalistic virtualized content cache that runs on the
Xen hypervisor. MiniCache is able to serve content at
rates of up to 32 Gb/s and handle up to 600K reqs/sec
on a single CPU core, as well as boot in about 90
milliseconds on x86 and around 370 milliseconds on
ARM32.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Zhang:2017:MAP,
author = "Jinshi Zhang and Eddie Dong and Jian Li and Haibing
Guan",
title = "{MigVisor}: Accurate Prediction of {VM} Live Migration
Behavior using a Working-Set Pattern Model",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "30--43",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050753",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Live migration of a virtual machine (VM) is a powerful
technique with benefits of server maintenance, resource
management, dynamic workload re-balance, etc. Modern
research has effectively reduced the VM live migration
(VMLM) time to dozens of milliseconds, but live
migration still exhibits failures if it cannot
terminate within the given time constraint. The ability
to predict this type of failure can avoid wasting
networking and computing resources on the VM migration,
and the associated system performance degradation
caused by wasting these resources. The cost of VM live
migration highly depends on the application workload of
the VM, which may undergo frequent changes. At the same
time, the available system resources for VM migration
can also change substantially and frequently. To
account for these issues, we present a solution called
MigVisor, which can accurately predict the behaviour of
VM migration using working-set model. This can enable
system managers to predict the migration cost and
enhance the system management efficacy. The
experimental results prove the design suitability and
show that the MigVisor has a high prediction accuracy
since the average relative error between the predicted
value and the measured value is only 6.2\%~9\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Garg:2017:CGA,
author = "Anshuj Garg and Debadatta Mishra and Purushottam
Kulkarni",
title = "{Catalyst}: {GPU}-assisted rapid memory deduplication
in virtualization environments",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "44--59",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050760",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Content based page sharing techniques improve memory
efficiency in virtualized systems by identifying and
merging identical pages. Kernel Same-page Merging
(KSM), a Linux kernel utility for page sharing,
sequentially scans memory pages of virtual machines to
deduplicate pages. Sequential scanning of pages has
several undesirable side effects---wasted CPU cycles
when no sharing opportunities exist, and rate of
discovery of sharing being dependent on the scanning
rate and corresponding CPU availability. In this work,
we exploit presence of GPUs on modern systems to enable
rapid memory sharing through targeted scanning of
pages. Our solution, Catalyst, works in two phases, the
first where pages of virtual machines are processed by
the GPU to identify likely pages for sharing and a
second phase that performs page-level similarity checks
on a targeted set of shareable pages. Opportunistic
usage of the GPU to produce sharing hints enables rapid
and low-overhead duplicate detection, and sharing of
memory pages in virtualization environments. We
evaluate Catalyst against various benchmarks and
workloads to demonstrate that Catalyst can achieve
higher memory sharing in lesser time compared to
different scan rate configurations of KSM, at lower or
comparable compute costs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Fumero:2017:JTG,
author = "Juan Fumero and Michel Steuwer and Lukas Stadler and
Christophe Dubach",
title = "Just-In-Time {GPU} Compilation for Interpreted
Languages with Partial Evaluation",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "60--73",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050761",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computer systems are increasingly featuring powerful
parallel devices with the advent of many-core CPUs and
GPUs. This offers the opportunity to solve
computationally-intensive problems at a fraction of the
time traditional CPUs need. However, exploiting
heterogeneous hardware requires the use of low-level
programming language approaches such as OpenCL, which
is incredibly challenging, even for advanced
programmers. On the application side, interpreted
dynamic languages are increasingly becoming popular in
many domains due to their simplicity, expressiveness
and flexibility. However, this creates a wide gap
between the high-level abstractions offered to
programmers and the low-level hardware-specific
interface. Currently, programmers must rely on high
performance libraries or they are forced to write parts
of their application in a low-level language like
OpenCL. Ideally, nonexpert programmers should be able
to exploit heterogeneous hardware directly from their
interpreted dynamic languages. In this paper, we
present a technique to transparently and automatically
offload computations from interpreted dynamic languages
to heterogeneous devices. Using just-in-time
compilation, we automatically generate OpenCL code at
runtime which is specialized to the actual observed
data types using profiling information. We demonstrate
our technique using R, which is a popular interpreted
dynamic language predominately used in big data
analytic. Our experimental results show the execution
on a GPU yields speedups of over 150x compared to the
sequential FastR implementation and the obtained
performance is competitive with manually written GPU
code. We also show that when taking into account
start-up time, large speedups are achievable, even when
the applications run for as little as a few seconds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Kotselidis:2017:HMR,
author = "Christos Kotselidis and James Clarkson and Andrey
Rodchenko and Andy Nisbet and John Mawer and Mikel
Luj{\'a}n",
title = "Heterogeneous Managed Runtime Systems: a Computer
Vision Case Study",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "74--82",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050764",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Real-time 3D space understanding is becoming prevalent
across a wide range of applications and hardware
platforms. To meet the desired Quality of Service
(QoS), computer vision applications tend to be heavily
parallelized and exploit any available hardware
accelerators. Current approaches to achieving real-time
computer vision, evolve around programming languages
typically associated with High Performance Computing
along with binding extensions for OpenCL or CUDA
execution. Such implementations, although high
performing, lack portability across the wide range of
diverse hardware resources and accelerators. In this
paper, we showcase how a complex computer vision
application can be implemented within a managed runtime
system. We discuss the complexities of achieving
high-performing and portable execution across embedded
and desktop configurations. Furthermore, we demonstrate
that it is possible to achieve the QoS target of over
30 frames per second (FPS) by exploiting FPGA and GPGPU
acceleration transparently through the managed runtime
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Deng:2017:DWT,
author = "Liang Deng and Peng Liu and Jun Xu and Ping Chen and
Qingkai Zeng",
title = "Dancing with Wolves: Towards Practical Event-driven
{VMM} Monitoring",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "83--96",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050750",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a novel framework that enables
practical event-driven monitoring for untrusted virtual
machine monitors (VMMs) in cloud computing. Unlike
previous approaches for VMM monitoring, our framework
neither relies on a higher privilege level nor requires
any special hardware support. Instead, we place the
trusted monitor at the same privilege level and in the
same address space with the untrusted VMM to achieve
superior efficiency, while proposing a unique
mutual-protection mechanism to ensure the integrity of
the monitor. Our security analysis demonstrates that
our framework can provide high-assurance for
event-driven VMM monitoring, even if the
highest-privilege VMM is fully compromised. The
experimental results show that our framework only
incurs trivial performance overhead for enforcing
event-driven monitoring policies, exhibiting tremendous
performance improvement on previous approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Suneja:2017:SIL,
author = "Sahil Suneja and Ricardo Koller and Canturk Isci and
Eyal de Lara and Ali Hashemi and Arnamoy Bhattacharyya
and Cristiana Amza",
title = "Safe Inspection of Live Virtual Machines",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "97--111",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050766",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With DevOps automation and an everything-as-code
approach to lifecycle management for cloud-native
applications, challenges emerge from an operational
visibility and control perspective. Once a VM is
deployed in production it typically becomes a hands-off
entity in terms of restrictions towards inspecting or
tuning it, for the fear of negatively impacting its
operation. We present CIVIC (Cloning and Injection
based VM Inspection for Cloud), a new mechanism that
enables safe inspection of unmodified production VMs
on-the-fly. CIVIC restricts all impact and side-effects
of inspection or analysis operations inside a live
clone of the production VM. New functionality over the
replicated VM state is introduced using code injection.
In this paper, we describe the design and
implementation of our solution over KVM/QEMU. We
demonstrate four of its use-cases-(i) safe reuse of
system monitoring agents, (ii) impact-heavy problem
diagnostics and troubleshooting, (iii) attaching an
intrusive anomaly detector to a live service, and (iv)
live tuning of a webserver's configuration parameters.
Our evaluation shows CIVIC is nimble and lightweight in
terms of memory footprint as well as clone activation
time (6.5s), and has a low impact on the original VM
({$<$} 10\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Arulraj:2017:IVS,
author = "Leo Arulraj and Andrea C. Arpaci-Dusseau and Remzi H.
Arpaci-Dusseau",
title = "Improving Virtualized Storage Performance with Sky",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "112--128",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050755",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce Sky, an extension to the VMM that gathers
insights and information by intercepting system calls
made by guest applications. We show how Sky gains three
specific insights --- guest file-size information,
metadata-data distinction, and file-content hints ---
and uses said information to enhance
virtualized-storage performance. By caching small files
and metadata with higher priority, Sky reduces the
runtime by 2.3 to 8.8 times for certain workloads. Sky
also achieves 4.5 to 18.7 times reduction in the
runtime of an open-source block-layer deduplication
system by exploiting hints about file contents. Sky
works underneath both Linux and FreeBSD guests, as well
as under a range of file systems, thus enabling
portable and general VMM-level optimization underneath
a wide range of storage stacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Hetzelt:2017:SAE,
author = "Felicitas Hetzelt and Robert Buhren",
title = "Security Analysis of Encrypted Virtual Machines",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "129--142",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050763",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cloud computing has become indispensable in today's
computer landscape. The flexibility it offers for
customers as well as for providers has become a crucial
factor for large parts of the computer industry.
Virtualization is the key technology that allows for
sharing of hardware resources among different
customers. The controlling software component, called
hypervisor, provides a virtualized view of the computer
resources and ensures separation of different guest
virtual machines. However, this important cornerstone
of cloud computing is not necessarily trustworthy or
bug-free. To mitigate this threat AMD introduced Secure
Encrypted Virtualization, short SEV, which
transparently encrypts a virtual machines memory. In
this paper we analyse to what extend the proposed
features can resist a malicious hypervisor and discuss
the tradeoffs imposed by additional protection
mechanisms. To do so, we developed a model of SEV's
security capabilities based on the available
documentation as actual silicon implementations are not
yet on the market. We found that the first proposed
version of SEV is not up to the task owing to three
design shortcomings. First the virtual machine control
block is not encrypted and handled directly by the
hypervisor, allowing it to bypass VM memory encryption
by executing conveniently chosen gadgets. Secondly, the
general purpose registers are not encrypted upon
vmexit, leaking potentially sensitive data. Finally,
the control over the nested pagetables allows a
malicious hypervisor to closely monitor the execution
state of a VM and attack it with memory replay
attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Wang:2017:RLW,
author = "Zhe Wang and Chenggang Wu and Jianjun Li and Yuanming
Lai and Xiangyu Zhang and Wei-Chung Hsu and Yueqiang
Cheng",
title = "{ReRanz}: a Light-Weight Virtual Machine to Mitigate
Memory Disclosure Attacks",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "143--156",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050752",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent code reuse attacks are able to circumvent
various address space layout randomization (ASLR)
techniques by exploiting memory disclosure
vulnerabilities. To mitigate sophisticated code reuse
attacks, we proposed a light-weight virtual machine,
ReRanz, which deployed a novel continuous binary code
re-randomization to mitigate memory disclosure oriented
attacks. In order to meet security and performance
goals, costly code randomization operations were
outsourced to a separate process, called the
``shuffling process''. The shuffling process
continuously flushed the old code and replaced it with
a fine-grained randomized code variant. ReRanz repeated
the process each time an adversary might obtain the
information and upload a payload. Our performance
evaluation shows that ReRanz Virtual Machine incurs a
very low performance overhead. The security evaluation
shows that ReRanz successfully protect the Nginx web
server against the Blind-ROP attack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Estrada:2017:UDP,
author = "Zachary J. Estrada and Read Sprabery and Lok Yan and
Zhongzhi Yu and Roy Campbell and Zbigniew Kalbarczyk
and Ravishankar K. Iyer",
title = "Using {OS} Design Patterns to Provide Reliability and
Security as-a-Service for {VM-based} Clouds",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "157--170",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050759",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper extends the concepts behind cloud services
to offer hypervisor-based reliability and security
monitors for cloud virtual machines. Cloud VMs can be
heterogeneous and as such guest OS parameters needed
for monitoring can vary across different VMs and must
be obtained in some way. Past work involves running
code inside the VM, which is unacceptable for a cloud
environment. We solve this problem by recognizing that
there are common OS design patterns that can be used to
infer monitoring parameters from the guest OS. We
extract information about the cloud user's guest OS
with the user's existing VM image and knowledge of OS
design patterns as the only inputs to analysis. To
demonstrate the range of monitoring functionality
possible with this technique, we implemented four
sample monitors: a guest OS process tracer, an OS hang
detector, a return-to-user attack detector, and a
process-based keylogger detector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Hussein:2017:OPR,
author = "Ahmed Hussein and Mathias Payer and Antony L. Hosking
and Chris Vick",
title = "One Process to Reap Them All: Garbage Collection
as-a-Service",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "171--186",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050754",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Ubiquitous mobile platforms such as Android rely on
managed language run-time environments, also known as
language virtual machines (VMs), to run a diverse range
of user applications (apps). Each app runs in its own
private VM instance, and each VM makes its own private
local decisions in managing its use of processor and
memory resources. Moreover, the operating system and
the hardware do not communicate their low-level
decisions regarding power management with the
high-level app environment. This lack of coordination
across layers and across apps restricts more effective
global use of resources on the device. We address this
problem by devising and implementing a global memory
manager service for Android that optimizes memory
usage, run-time performance, and power consumption
globally across all apps running on the device. The
service focuses on the impact of garbage collection
(GC) along these dimensions, since GC poses a
significant overhead within managed run-time
environments. Our prototype collects system-wide
statistics from all running VMs, makes centralized
decisions about memory management across apps and
across software layers, and also collects garbage
centrally. Furthermore, the global memory manager
coordinates with the power manager to tune collector
scheduling. In our evaluation, we illustrate the impact
of such a central memory management service in reducing
total energy consumption (up to 18\%) and increasing
throughput (up to 12\%), and improving memory
utilization and adaptability to user activities.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Zhang:2017:DLN,
author = "Jie Zhang and Xiaoyi Lu and Dhabaleswar K. (DK)
Panda",
title = "Designing Locality and {NUMA} Aware {MPI} Runtime for
Nested Virtualization based {HPC} Cloud with {SR--IOV}
Enabled {InfiniBand}",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "187--200",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050765",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hypervisor-based virtualization solutions reveal good
security and isolation, while container-based solutions
make applications and workloads more portable and
distributed in an effective, standardized and
repeatable way. Therefore, nested virtualization based
computing environments (e.g., container over virtual
machine), which inherit the capabilities from both
solutions, are becoming more and more attractive in
clouds (e.g., running Docker over Amazon EC2 VMs).
Recent studies have shown that running applications in
either VMs or containers still has significant
overhead, especially for I/O intensive workloads. This
motivates us to investigate whether the nested
virtualization based solution can be adopted to build
high-performance computing (HPC) clouds for running MPI
applications efficiently and where the bottlenecks lie.
To eliminate performance bottlenecks, we propose a
high-performance two-layer locality and NUMA aware MPI
library, which is able to dynamically detect
co-resident containers inside one VM as well as detect
co-resident VM inside one host at MPI runtime. Thus the
MPI processes across different containers and VMs can
communicate to each other by shared memory or Cross
Memory Attach (CMA) channels instead of network channel
if they are co-resident. We further propose an enhanced
NUMA aware hybrid design to utilize InfiniBand loopback
based channel to optimize large message transfer across
containers when they are running on different sockets.
Performance evaluations show that compared with the
performance of the state-of-art (1Layer) design, our
proposed enhance-hybrid design can bring up to 184\%,
81\% and 12\% benefit on point-to-point, collective
operations, and end applications. Compared with the
default performance, our enhanced-hybrid design
delivers up to 184\%, 85\% and 16\% performance
improvement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Lu:2017:FPL,
author = "Kai Lu and Wenzhe Zhang and Xiaoping Wang and Mikel
Luj{\'a}n and Andy Nisbet",
title = "Flexible Page-level Memory Access Monitoring Based on
Virtualization Hardware",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "201--213",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050751",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Page protection is often used to achieve memory access
monitoring in many applications, dealing with
program-analysis, checkpoint-based failure recovery,
and garbage collection in managed runtime systems.
Typically, low overhead access monitoring is limited by
the relatively large page-level granularity of memory
management unit hardware support for virtual memory
protection. In this paper, we improve upon traditional
page-level mechanisms by additionally using hardware
support for virtualization in order to achieve fine and
flexible granularities that can be smaller than a page.
We first introduce a memory allocator based on page
protection that can achieve fine-grained monitoring.
Second, we explain how virtualization hardware support
can be used to achieve dynamic adjustment of the
monitoring granularity. In all, we propose a
process-level virtual machine to achieve dynamic and
fine-grained monitoring. Any application can run on our
process-level virtual machine without modification.
Experimental results for an incremental checkpoint tool
provide a use-case to demonstrate our work. Comparing
with traditional page-based checkpoint, our work can
effectively reduce the amount of checkpoint data and
improve performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Yang:2017:CLA,
author = "Chun Yang and Xianhua Liu and Xu Cheng",
title = "Content Look-Aside Buffer for Redundancy-Free Virtual
Disk {I/O} and Caching",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "214--227",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050762",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Storage consolidation in a virtualized environment
introduces numerous duplications in virtual disks and
imposes considerable pressure on disk I/O and caching.
In this paper, we present a content look-aside buffer
(CLB) approach for simultaneously providing
redundancy-free virtual disk I/O and caching. CLB
attaches persistent fingerprints to virtual disk
blocks, which enables detection of I/O redundancy
before disk access. At run time, CLB exploits content
pages already present in the guest disk caches to
service the redundant reads through page sharing, thus
eliminating both redundant I/O requests and redundant
disk cache copies. For write requests, CLB uses a group
invalidating writeback protocol for updating
fingerprints to support crash consistency while
minimizing disk write overhead. By implementing and
evaluating a CLB prototype on KVM hypervisor, we
demonstrate that CLB delivers considerably improved I/O
performance with realistic workloads. Our CLB prototype
improves the throughput of sequential and random read
on duplicate data by 4.1x and 26.2x, respectively. For
typical read-intensive workloads, such as booting VM
and launching application, CLB's I/O deduplication and
cache deduplication eliminates 94.9\%--98.5\% of read
requests and saves 50\%--100\% cache memory in each VM,
respectively. Compared with the QEMU's raw virtual disk
format, CLB improves the per-disk VM density by
8x--16x. For mixed read-write workloads, the cost of
on-line fingerprint updating offsets the read benefit;
nevertheless, CLB substantially improves overall
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{dAntras:2017:HXU,
author = "Amanieu d'Antras and Cosmin Gorgovan and Jim Garside
and John Goodacre and Mikel Luj{\'a}n",
title = "{HyperMAMBO-X64}: Using Virtualization to Support
High-Performance Transparent Binary Translation",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "228--241",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050756",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Current computer architectures --- ARM, MIPS, PowerPC,
SPARC, x86 --- have evolved from a 32-bit architecture
to a 64-bit one. Computer architects often consider
whether it could be possible to eliminate hardware
support for a subset of the instruction set as to
reduce hardware complexity, which could improve
performance, reduce power usage and accelerate
processor development. This paper considers the
scenario where we want to eliminate 32-bit hardware
support from the ARMv8 architecture. Dynamic binary
translation can be used for this purpose and generally
comes in one of two forms: application-level
translators that translate a single user mode process
on top of a native operating system, and system-level
translators that translate an entire operating system
and all its processes. Application-level translators
can have good performance but is not totally
transparent; system-level translators may be 100\%
compatible but performance suffers. HyperMAMBO-X64 uses
a new approach that gets the best of both worlds, being
able to run the translator as an application under the
hypervisor but still react to the behavior of guest
operating systems. It works with complete transparency
with regards to the virtualized system whilst
delivering performance close to that provided by
hardware execution. A key factor in the low overhead of
HyperMAMBO-X64 is its deep integration with the
virtualization and memory management features of ARMv8.
These are exploited to support caching of translations
across multiple address spaces while ensuring that
translated code remains consistent with the source
instructions it is based on. We show how these
attributes are achieved without sacrificing either
performance or accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Zhu:2017:VLV,
author = "Min Zhu and Bibo Tu and Wei Wei and Dan Meng",
title = "{HA-VMSI}: a Lightweight Virtual Machine Isolation
Approach with Commodity Hardware for {ARM}",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "242--256",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050767",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Once compromising the hypervisor, remote or local
adversaries can easily access other customers'
sensitive data in the memory and context of guest
virtual machines (VMs). VM isolation is an efficient
mechanism for protecting the memory of guest VMs from
unauthorized access. However, previous VM isolation
systems either modify hardware architecture or
introduce a software module without being protected,
and most of them focus on the x86 architecture. This
paper proposes HA-VMSI, a lightweight hardware-assisted
VM isolation approach for ARM, to provide runtime
protection of guest VMs, even with a compromised
hypervisor. In the ARM TrustZone secure world, a thin
security monitor is introduced as HA-VMSI's entire TCB.
Hence, the security monitor is much less vulnerable and
safe from attacks that can compromise the hypervisor.
The key of HA-VMSI is decoupling the functions of
memory isolation among VMs from the hypervisor into the
security monitor. As a result, the hypervisor can only
update the Stage-2 page tables of VMs via the security
monitor, which inspects and approves each new mapping.
It is worth noting that HA-VMSI is more secure and
effective than current software approaches, and more
flexible and compatible than hardware approaches. We
have implemented a prototype for KVM hypervisor with
multiple Linux as guest OSes on Juno board. The
security assessment and performance evaluation show
that HA-VMSI is effective, efficient and practical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '17 conference proceedings.",
}
@Article{Steele:2017:TNO,
author = "Guy L. {Steele, Jr.}",
title = "It's Time for a New Old Language",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "1--1",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018773",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The most popular programming language in computer
science has no compiler or interpreter. Its definition
is not written down in any one place. It has changed a
lot over the decades, and those changes have introduced
ambiguities and inconsistencies. Today, dozens of
variations are in use, and its complexity has reached
the point where it needs to be re-explained, at least
in part, every time it is used. Much effort has been
spent in hand-translating between this language and
other languages that do have compilers. The language is
quite amenable to parallel computation, but this fact
has gone unexploited. In this talk we will summarize
the history of the language, highlight the variations
and some of the problems that have arisen, and propose
specific solutions. We suggest that it is high time
that this language be given a complete formal
specification, and that compilers, IDEs, and
proof-checkers be created to support it, so that all
the best tools and techniques of our trade may be
applied to it also.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Chen:2017:ESF,
author = "Guoyang Chen and Yue Zhao and Xipeng Shen and Huiyang
Zhou",
title = "{EffiSha}: a Software Framework for Enabling Efficient
Preemptive Scheduling of {GPU}",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "3--16",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018748",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern GPUs are broadly adopted in many multitasking
environments, including data centers and smartphones.
However, the current support for the scheduling of
multiple GPU kernels (from different applications) is
limited, forming a major barrier for GPU to meet many
practical needs. This work for the first time
demonstrates that on existing GPUs, efficient
preemptive scheduling of GPU kernels is possible even
without special hardware support. Specifically, it
presents EffiSha, a pure software framework that
enables preemptive scheduling of GPU kernels with very
low overhead. The enabled preemptive scheduler offers
flexible support of kernels of different priorities,
and demonstrates significant potential for reducing the
average turnaround time and improving the system
overall throughput of programs that time share a modern
GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Cohen:2017:LLS,
author = "Nachshon Cohen and Arie Tal and Erez Petrank",
title = "Layout Lock: a Scalable Locking Paradigm for
Concurrent Data Layout Modifications",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "17--29",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018753",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data-structures can benefit from dynamic data layout
modifications when the size or the shape of the data
structure changes during the execution, or when
different phases in the program execute different
workloads. However, in a modern multi-core environment,
layout modifications involve costly synchronization
overhead. In this paper we propose a novel layout lock
that incurs a negligible overhead for reads and a small
overhead for updates of the data structure. We then
demonstrate the benefits of layout changes and also the
advantages of the layout lock as its supporting
synchronization mechanism for two data structures. In
particular, we propose a concurrent binary search tree,
and a concurrent array set, that benefit from
concurrent layout modifications using the proposed
layout lock. Experience demonstrates performance
advantages and integration simplicity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Zhang:2017:UGM,
author = "Xiuxia Zhang and Guangming Tan and Shuangbai Xue and
Jiajia Li and Keren Zhou and Mingyu Chen",
title = "Understanding the {GPU} Microarchitecture to Achieve
Bare-Metal Performance Tuning",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "31--43",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018755",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we present a methodology to understand
GPU microarchitectural features and improve performance
for compute-intensive kernels. The methodology relies
on a reverse engineering approach to crack the GPU ISA
encodings in order to build a GPU assembler. An
assembly microbenchmark suite correlates
microarchitectural features with their performance
factors to uncover instruction-level and memory
hierarchy preferences. We use SGEMM as a running
example to show the ways to achieve bare-metal
performance tuning. The performance boost is achieved
by tuning FFMA throughput by activating dual-issue,
eliminating register bank conflicts, adding non-FFMA
instructions with little penalty, and choosing proper
width of global/shared load instructions. On NVIDIA
Kepler K20m, we develop a faster SGEMM with 3.1Tflop/s
performance and 88\% efficiency; the performance is
15\% higher than cuBLAS7.0. Applying these
optimizations to convolution, the implementation gains
39\%--62\% performance improvement compared with
cuDNN4.0. The toolchain is an attempt to automatically
crack different GPU ISA encodings and build an
assembler adaptively for the purpose of performance
enhancements to applications on GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Ou:2017:CCD,
author = "Peizhao Ou and Brian Demsky",
title = "Checking Concurrent Data Structures Under the {C\slash
C++11} Memory Model",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "45--59",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018749",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent data structures often provide better
performance on multi-core processors but are
significantly more difficult to design and test than
their sequential counterparts. The C/C++11 standard
introduced a weak memory model with support for
low-level atomic operations such as compare and swap
(CAS). While low-level atomic operations can
significantly improve the performance of concurrent
data structures, they introduce non-intuitive behaviors
that can increase the difficulty of developing code. In
this paper, we develop a correctness model for
concurrent data structures that make use of atomic
operations. Based on this correctness model, we present
CDSSPEC, a specification checker for concurrent data
structures under the C/C++11 memory model. We have
evaluated CDSSPEC on 10 concurrent data structures,
among which CDSSPEC detected 3 known bugs and 93\% of
the injected bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Chabbi:2017:EAL,
author = "Milind Chabbi and Abdelhalim Amer and Shasha Wen and
Xu Liu",
title = "An Efficient Abortable-locking Protocol for
Multi-level {NUMA} Systems",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "61--74",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018768",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The popularity of Non-Uniform Memory Access (NUMA)
architectures has led to numerous locality-preserving
hierarchical lock designs, such as HCLH, HMCS, and
cohort locks. Locality-preserving locks trade fairness
for higher throughput. Hence, some instances of
acquisitions can incur long latencies, which may be
intolerable for certain applications. Few locks admit a
waiting thread to abandon its protocol on a timeout.
State-of-the-art abortable locks are not fully locality
aware, introduce high overheads, and unsuitable for
frequent aborts. Enhancing locality-aware locks with
lightweight timeout capability is critical for their
adoption. In this paper, we design and evaluate the
HMCS-T lock, a Hierarchical MCS (HMCS) lock variant
that admits a timeout. HMCS-T maintains the locality
benefits of HMCS while ensuring aborts to be
lightweight. HMCS-T offers the progress guarantee
missing in most abortable queuing locks. Our
evaluations show that HMCS-T offers the timeout feature
at a moderate overhead over its HMCS analog. HMCS-T,
used in an MPI runtime lock, mitigated the poor
scalability of an MPI+OpenMP BFS code and resulted in
4.3x superior scaling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Acar:2017:CSC,
author = "Umut A. Acar and Naama Ben-David and Mike Rainey",
title = "Contention in Structured Concurrency: Provably
Efficient Dynamic Non-Zero Indicators for Nested
Parallelism",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "75--88",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018762",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the past two decades, many concurrent data
structures have been designed and implemented. Nearly
all such work analyzes concurrent data structures
empirically, omitting asymptotic bounds on their
efficiency, partly because of the complexity of the
analysis needed, and partly because of the difficulty
of obtaining relevant asymptotic bounds: when the
analysis takes into account important practical
factors, such as contention, it is difficult or even
impossible to prove desirable bounds. In this paper, we
show that considering structured concurrency or relaxed
concurrency models can enable establishing strong
bounds, also for contention. To this end, we first
present a dynamic relaxed counter data structure that
indicates the non-zero status of the counter. Our data
structure extends a recently proposed data structure,
called SNZI, allowing our structure to grow dynamically
in response to the increasing degree of concurrency in
the system. Using the dynamic SNZI data structure, we
then present a concurrent data structure for
series-parallel directed acyclic graphs (sp-dags), a
key data structure widely used in the implementation of
modern parallel programming languages. The key
component of sp-dags is an in-counter data structure
that is an instance of our dynamic SNZI. We analyze the
efficiency of our concurrent sp-dags and in-counter
data structures under nested-parallel computing
paradigm. This paradigm offers a structured model for
concurrency. Under this model, we prove that our data
structures require amortized (1) shared memory steps,
including contention. We present an implementation and
an experimental evaluation that suggests that the
sp-dags data structure is practical and can perform
well in practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Sato:2017:NIT,
author = "Kento Sato and Dong H. Ahn and Ignacio Laguna and
Gregory L. Lee and Martin Schulz and Christopher M.
Chambreau",
title = "Noise Injection Techniques to Expose Subtle and
Unintended Message Races",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "89--101",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018767",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Debugging intermittently occurring bugs within MPI
applications is challenging, and message races, a
condition in which two or more sends race to match with
a receive, are one of the common root causes. Many
debugging tools have been proposed to help programmers
resolve them, but their runtime interference perturbs
the timing such that subtle races often cannot be
reproduced with debugging tools. We present novel noise
injection techniques to expose message races even under
a tool's control. We first formalize this race problem
in the context of non-deterministic parallel
applications and use this analysis to determine an
effective noise-injection strategy to uncover them. We
codified these techniques in NINJA (Noise INJection
Agent) that exposes these races without modification to
the application. Our evaluations on synthetic cases as
well as a real-world bug in Hypre-2.10.1 show that
NINJA significantly helps expose races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Luo:2017:TDS,
author = "Hao Luo and Pengcheng Li and Chen Ding",
title = "Thread Data Sharing in Cache: Theory and Measurement",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "103--115",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018759",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "On modern multi-core processors, independent workloads
often interfere with each other by competing for shared
cache space. However, for multi-threaded workloads,
where a single copy of data can be accessed by multiple
threads, the threads can cooperatively share cache.
Because data sharing consolidates the collective
working set of threads, the effective size of shared
cache becomes larger than it would have been when data
are not shared. This paper presents a new theory of
data sharing. It includes (1) a new metric called the
shared footprint to mathematically compute the amount
of data shared by any group of threads in any size
cache, and (2) a linear-time algorithm to measure
shared footprint by scanning the memory trace of a
multi-threaded program. The paper presents the
practical implementation and evaluates the new theory
using 14 PARSEC and SPEC OMP benchmarks, including an
example use of shared footprint in program
optimization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Ren:2017:EVM,
author = "Bin Ren and Sriram Krishnamoorthy and Kunal Agrawal
and Milind Kulkarni",
title = "Exploiting Vector and Multicore Parallelism for
Recursive, Data- and Task-Parallel Programs",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "117--130",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018763",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern hardware contains parallel execution resources
that are well-suited for data-parallelism-vector
units-and task parallelism-multicores. However, most
work on parallel scheduling focuses on one type of
hardware or the other. In this work, we present a
scheduling framework that allows for a unified
treatment of task- and data-parallelism. Our key
insight is an abstraction, task blocks, that uniformly
handles data-parallel iterations and task-parallel
tasks, allowing them to be scheduled on vector units or
executed independently as multicores. Our framework
allows us to define schedulers that can dynamically
select between executing task-blocks on vector units or
multicores. We show that these schedulers are
asymptotically optimal, and deliver the maximum amount
of parallelism available in computation trees. To
evaluate our schedulers, we develop program
transformations that can convert mixed data- and
task-parallel programs into task block-based programs.
Using a prototype instantiation of our scheduling
framework, we show that, on an 8-core system, we can
simultaneously exploit vector and multicore parallelism
to achieve $ 14 \times $--$ 108 \times $ speedup over
sequential baselines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Shudler:2017:IPC,
author = "Sergei Shudler and Alexandru Calotoiu and Torsten
Hoefler and Felix Wolf",
title = "Isoefficiency in Practice: Configuring and
Understanding the Performance of Task-based
Applications",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "131--143",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018770",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Task-based programming offers an elegant way to
express units of computation and the dependencies among
them, making it easier to distribute the computational
load evenly across multiple cores. However, this
separation of problem decomposition and parallelism
requires a sufficiently large input problem to achieve
satisfactory efficiency on a given number of cores.
Unfortunately, finding a good match between input size
and core count usually requires significant
experimentation, which is expensive and sometimes even
impractical. In this paper, we propose an automated
empirical method for finding the isoefficiency function
of a task-based program, binding efficiency, core
count, and the input size in one analytical expression.
This allows the latter two to be adjusted according to
given (realistic) efficiency objectives. Moreover, we
not only find (i) the actual isoefficiency function but
also (ii) the function one would yield if the program
execution was free of resource contention and (iii) an
upper bound that could only be reached if the program
was able to maintain its average parallelism throughout
its execution. The difference between the three helps
to explain low efficiency, and in particular, it helps
to differentiate between resource contention and
structural conflicts related to task dependencies or
scheduling. The insights gained can be used to
co-design programs and shared system resources.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Utterback:2017:POR,
author = "Robert Utterback and Kunal Agrawal and I-Ting Angelina
Lee and Milind Kulkarni",
title = "Processor-Oblivious Record and Replay",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "145--161",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018764",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Record-and-replay systems are useful tools for
debugging non-deterministic parallel programs by first
recording an execution and then replaying that
execution to produce the same access pattern. Existing
record-and-replay systems generally target thread-based
execution models, and record the behaviors and
interleavings of individual threads. Dynamic
multithreaded languages and libraries, such as the Cilk
family, OpenMP, TBB, etc., do not have a notion of
threads. Instead, these languages provide a
processor-oblivious model of programming, where
programs expose task-parallelism using high-level
constructs such as spawn/sync without regard to the
number of threads/cores available to run the program.
Thread-based record-and-replay would violate the
processor-oblivious nature of these programs, as they
incorporate the number of threads into the recorded
information, constraining the replayed execution to the
same number of threads. In this paper, we present a
processor-oblivious record-and-replay scheme for such
languages where record and replay can use different
number of processors and both are scheduled using work
stealing. We provide theoretical guarantees for our
record and replay scheme --- namely that record is
optimal for programs with one lock and replay is
near-optimal for all cases. In addition, we implemented
this scheme in the Cilk Plus runtime system and our
evaluation indicates that processor-obliviousness does
not cause substantial overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Prajapati:2017:SAA,
author = "Nirmal Prajapati and Waruna Ranasinghe and Sanjay
Rajopadhye and Rumen Andonov and Hristo Djidjev and
Tobias Grosser",
title = "Simple, Accurate, Analytical Time Modeling and Optimal
Tile Size Selection for {GPGPU} Stencils",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "163--177",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018744",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Stencil computations are an important class of compute
and data intensive programs that occur widely in
scientific and engineering applications. A number of
tools use sophisticated tiling, parallelization, and
memory mapping strategies, and generate code that
relies on vendor-supplied compilers. This code has a
number of parameters, such as tile sizes, that are then
tuned via empirical exploration. We develop a model
that guides such a choice. Our model is a simple set of
analytical functions that predict the execution time of
the generated code. It is deliberately optimistic,
since tile sizes and, moreover, the optimistic
assumptions are intended to enable we are targeting
modeling and parameter selections yielding highly tuned
codes. We experimentally validate the model on a number
of 2D and 3D stencil codes, and show that the root mean
square error in the execution time is less than 10\%
for the subset of the codes that achieve performance
within 20\% of the best. Furthermore, based on using
our model, we are able to predict tile sizes that
achieve a further improvement of 9\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Jiang:2017:CSM,
author = "Peng Jiang and Gagan Agrawal",
title = "Combining {SIMD} and Many\slash Multi-core Parallelism
for Finite State Machines with Enumerative
Speculation",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "179--191",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018760",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "Finite State Machine (FSM) is the key kernel behind
many popular applications, including regular expression
matching, text tokenization, and Huffman decoding.
Parallelizing FSMs is extremely difficult because of
the strong dependencies and unpredictable memory
accesses. Previous efforts have largely focused on
multi-core parallelization, and used different
approaches, including {\em speculative\/} and {\em
enumerative\/} execution, both of which have been
effective but also have limitations. With increasing
width and improving flexibility in SIMD instruction
sets, this paper focuses on combining SIMD and
multi/many-core parallelism for FSMs. We have developed
a novel strategy, called {\em enumerative speculation}.
Instead of speculating on a single state as in
speculative execution or enumerating all possible
states as in enumerative execution, our strategy
speculates transitions from several possible states,
reducing the prediction overheads of speculation
approach and the large amount of redundant work in the
enumerative approach. A simple lookback approach
produces a set of guessed states to achieve high
speculation success rates in our enumerative
speculation. We evaluate our method with four popular
FSM applications: Huffman decoding, regular expression
matching, HTML tokenization, and Div7. We obtain up to
2.5x speedup using SIMD on one core and up to 95x
combining SIMD with 60 cores of an Intel Xeon Phi. On a
single core, we outperform the best single-state
speculative execution version by an average of 1.6x,
and in combining SIMD and many-core parallelism,
outperform enumerative execution by an average of 2x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Awan:2017:CCD,
author = "Ammar Ahmad Awan and Khaled Hamidouche and Jahanzeb
Maqbool Hashmi and Dhabaleswar K. Panda",
title = "{S-Caffe}: Co-designing {MPI} Runtimes and {Caffe} for
Scalable Deep Learning on Modern {GPU} Clusters",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "193--205",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018769",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Availability of large data sets like ImageNet and
massively parallel computation support in modern HPC
devices like NVIDIA GPUs have fueled a renewed interest
in Deep Learning (DL) algorithms. This has triggered
the development of DL frameworks like Caffe, Torch,
TensorFlow, and CNTK. However, most DL frameworks have
been limited to a single node. In order to scale out DL
frameworks and bring HPC capabilities to the DL arena,
we propose, S-Caffe; a scalable and distributed Caffe
adaptation for modern multi-GPU clusters. With an
in-depth analysis of new requirements brought forward
by the DL frameworks and limitations of current
communication runtimes, we present a co-design of the
Caffe framework and the MVAPICH2-GDR MPI runtime. Using
the co-design methodology, we modify Caffe's workflow
to maximize the overlap of computation and
communication with multi-stage data propagation and
gradient aggregation schemes. We bring DL-Awareness to
the MPI runtime by proposing a hierarchical reduction
design that benefits from CUDA-Aware features and
provides up to a massive 133x speedup over OpenMPI and
2.6x speedup over MVAPICH2 for 160 GPUs. S-Caffe
successfully scales up to 160 K-80 GPUs for GoogLeNet
(ImageNet) with a speedup of 2.5x over 32 GPUs. To the
best of our knowledge, this is the first framework that
scales up to 160 GPUs. Furthermore, even for single
node training, S-Caffe shows an improvement of 14\% and
9\% over Nvidia's optimized Caffe for 8 and 16 GPUs,
respectively. In addition, S-Caffe achieves up to 1395
samples per second for the AlexNet model, which is
comparable to the performance of Microsoft CNTK.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Sabne:2017:MBI,
author = "Amit Sabne and Xiao Wang and Sherman J. Kisner and
Charles A. Bouman and Anand Raghunathan and Samuel P.
Midkiff",
title = "Model-based Iterative {CT} Image Reconstruction on
{GPUs}",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "207--220",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018765",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computed Tomography (CT) Image Reconstruction is an
important technique used in a variety of domains,
including medical imaging, electron microscopy,
non-destructive testing and transportation security.
Model-based Iterative Reconstruction (MBIR) using
Iterative Coordinate Descent (ICD) is a CT algorithm
that produces state-of-the-art results in terms of
image quality. However, MBIR is highly computationally
intensive and challenging to parallelize, and has
traditionally been viewed as impractical in
applications where reconstruction time is critical. We
present the first GPU-based algorithm for ICD-based
MBIR. The algorithm leverages the recently-proposed
concept of SuperVoxels, and efficiently exploits the
three levels of parallelism available in MBIR to better
utilize the GPU hardware resources. We also explore
data layout transformations to obtain more coalesced
accesses and several GPU-specific optimizations for
MBIR that boost performance. Across a suite of 3200
test cases, our GPU implementation obtains a geometric
mean speedup of 4.43X over a state-of-the-art
multi-core implementation on a 16-core iso-power CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Yeh:2017:PFG,
author = "Tsung Tai Yeh and Amit Sabne and Putt Sakdhnagool and
Rudolf Eigenmann and Timothy G. Rogers",
title = "{Pagoda}: Fine-Grained {GPU} Resource Virtualization
for Narrow Tasks",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "221--234",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018754",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Massively multithreaded GPUs achieve high throughput
by running thousands of threads in parallel. To fully
utilize the hardware, workloads spawn work to the GPU
in bulk by launching large tasks, where each task is a
kernel that contains thousands of threads that occupy
the entire GPU. GPUs face severe underutilization and
their performance benefits vanish if the tasks are
narrow, i.e., they contain {$<$} 500 threads.
Latency-sensitive applications in network, signal, and
image processing that generate a large number of tasks
with relatively small inputs are examples of such
limited parallelism. This paper presents Pagoda, a
runtime system that virtualizes GPU resources, using an
OS-like daemon kernel called MasterKernel. Tasks are
spawned from the CPU onto Pagoda as they become
available, and are scheduled by the MasterKernel at the
warp granularity. Experimental results demonstrate that
Pagoda achieves a geometric mean speedup of 5.70x over
PThreads running on a 20-core CPU, 1.51x over
CUDA-HyperQ, and 1.69x over GeMTC, the state-of-
the-art runtime GPU task scheduling system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Ben-Nun:2017:GAM,
author = "Tal Ben-Nun and Michael Sutton and Sreepathi Pai and
Keshav Pingali",
title = "{Groute}: an Asynchronous Multi-{GPU} Programming
Model for Irregular Computations",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "235--248",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018756",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nodes with multiple GPUs are becoming the platform of
choice for high-performance computing. However, most
applications are written using bulk-synchronous
programming models, which may not be optimal for
irregular algorithms that benefit from low-latency,
asynchronous communication. This paper proposes
constructs for asynchronous multi-GPU programming, and
describes their implementation in a thin runtime
environment called Groute. Groute also implements
common collective operations and distributed
work-lists, enabling the development of irregular
applications without substantial programming effort. We
demonstrate that this approach achieves
state-of-the-art performance and exhibits strong
scaling for a suite of irregular applications on 8-GPU
and heterogeneous systems, yielding over 7x speedup for
some algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Schardl:2017:TEF,
author = "Tao B. Schardl and William S. Moses and Charles E.
Leiserson",
title = "{Tapir}: Embedding Fork-Join Parallelism into {LLVM}'s
Intermediate Representation",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "249--265",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018758",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper explores how fork-join parallelism, as
supported by concurrency platforms such as Cilk and
OpenMP, can be embedded into a compiler's intermediate
representation (IR). Mainstream compilers typically
treat parallel linguistic constructs as syntactic sugar
for function calls into a parallel runtime. These calls
prevent the compiler from performing optimizations
across parallel control constructs. Remedying this
situation is generally thought to require an extensive
reworking of compiler analyses and code transformations
to handle parallel semantics. Tapir is a compiler IR
that represents logically parallel tasks asymmetrically
in the program's control flow graph. Tapir allows the
compiler to optimize across parallel control constructs
with only minor changes to its existing analyses and
code transformations. To prototype Tapir in the LLVM
compiler, for example, we added or modified about 6000
lines of LLVM's 4-million-line codebase. Tapir enables
LLVM's existing compiler optimizations for serial code
--- including loop-invariant-code motion,
common-subexpression elimination, and tail-recursion
elimination --- to work with parallel control
constructs such as spawning and parallel loops. Tapir
also supports parallel optimizations such as loop
scheduling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Matveev:2017:MPC,
author = "Alexander Matveev and Yaron Meirovitch and Hayk
Saribekyan and Wiktor Jakubiuk and Tim Kaler and
Gergely Odor and David Budden and Aleksandar Zlateski
and Nir Shavit",
title = "A Multicore Path to Connectomics-on-Demand",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "267--281",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018766",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The current design trend in large scale machine
learning is to use distributed clusters of CPUs and
GPUs with MapReduce-style programming. Some have been
led to believe that this type of horizontal scaling can
reduce or even eliminate the need for traditional
algorithm development, careful parallelization, and
performance engineering. This paper is a case study
showing the contrary: that the benefits of algorithms,
parallelization, and performance engineering, can
sometimes be so vast that it is possible to solve
``cluster-scale'' problems on a single commodity
multicore machine. Connectomics is an emerging area of
neurobiology that uses cutting edge machine learning
and image processing to extract brain connectivity
graphs from electron microscopy images. It has long
been assumed that the processing of connectomics data
will require mass storage, farms of CPU/GPUs, and will
take months (if not years) of processing time. We
present a high-throughput connectomics-on-demand system
that runs on a multicore machine with less than 100
cores and extracts connectomes at the terabyte per hour
pace of modern electron microscopes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Vollmer:2017:SHS,
author = "Michael Vollmer and Ryan G. Scott and Madanlal
Musuvathi and Ryan R. Newton",
title = "{SC-Haskell}: Sequential Consistency in Languages That
Minimize Mutable Shared Heap",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "283--298",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018746",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A core, but often neglected, aspect of a programming
language design is its memory (consistency) model.
Sequential consistency~(SC) is the most intuitive
memory model for programmers as it guarantees
sequential composition of instructions and provides a
simple abstraction of shared memory as a single global
store with atomic read and writes. Unfortunately, SC is
widely considered to be impractical due to its
associated performance overheads. Perhaps contrary to
popular opinion, this paper demonstrates that SC is
achievable with acceptable performance overheads for
mainstream languages that minimize mutable shared heap.
In particular, we modify the Glasgow Haskell Compiler
to insert fences on all writes to shared mutable memory
accessed in nonfunctional parts of the program. For a
benchmark suite containing 1,279 programs, SC adds a
geomean overhead of less than 0.4\% on an x86 machine.
The efficiency of SC arises primarily due to the
isolation provided by the Haskell type system between
purely functional and thread-local imperative
computations on the one hand, and imperative
computations on the global heap on the other. We show
how to use new programming idioms to further reduce the
SC overhead; these create a virtuous cycle of less
overhead and even stronger semantic guarantees (static
data-race freedom).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Battig:2017:SDC,
author = "Martin B{\"a}ttig and Thomas R. Gross",
title = "Synchronized-by-Default Concurrency for Shared-Memory
Systems",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "299--312",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018747",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We explore a programming approach for concurrency that
synchronizes all accesses to shared memory by default.
Synchronization takes place by ensuring that all
program code runs inside atomic sections even if the
program code has external side effects. Threads are
mapped to atomic sections that a programmer must
explicitly split to increase concurrency. A naive
implementation of this approach incurs a large amount
of overhead. We show how to reduce this overhead to
make the approach suitable for realistic application
programs on existing hardware. We present an
implementation technique based on a special-purpose
software transactional memory system. To reduce the
overhead, the technique exploits properties of managed,
object-oriented programming languages as well as
intraprocedural static analyses and uses field-level
granularity locking in combination with transactional
I/O to provide good scaling properties. We implemented
the synchronized-by-default (SBD) approach for the Java
language and evaluate its performance for six programs
from the DaCapo benchmark suite. The evaluation shows
that, compared to explicit synchronization, the SBD
approach has an overhead between 0.4\% and 102\%
depending on the benchmark and the number of threads,
with a mean (geom.) of 23.9\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Moreira:2017:FCR,
author = "Rubens E. A. Moreira and Sylvain Collange and Fernando
Magno Quint{\~a}o Pereira",
title = "Function Call Re-Vectorization",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "313--326",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018751",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "Programming languages such as C for CUDA, OpenCL or
ISPC have contributed to increase the programmability
of SIMD accelerators and graphics processing units.
However, these languages still lack the flexibility
offered by low-level SIMD programming on explicit
vectors. To close this expressiveness gap while
preserving performance, this paper introduces the
notion of \ourinvention{} (CREV). CREV allows changing
the dimension of vectorization during the execution of
a kernel, exposing it as a nested parallel kernel call.
CREV affords programmability close to dynamic
parallelism, a feature that allows the invocation of
kernels from inside kernels, but at much lower cost. In
this paper, we present a formal semantics of CREV, and
an implementation of it on the ISPC compiler. We have
used CREV to implement some classic algorithms,
including string matching, depth first search and
Bellman-Ford, with minimum effort. These algorithms,
once compiled by ISPC to Intel-based vector
instructions, are as fast as state-of-the-art
implementations, yet much simpler. Thus, CREV gives
developers the elegance of dynamic programming, and the
performance of explicit SIMD programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Rajbhandari:2017:OFI,
author = "Samyam Rajbhandari and Fabrice Rastello and Karol
Kowalski and Sriram Krishnamoorthy and P. Sadayappan",
title = "Optimizing the Four-Index Integral Transform Using
Data Movement Lower Bounds Analysis",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "327--340",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018771",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The four-index integral transform is a fundamental and
computationally demanding calculation used in many
computational chemistry suites such as NWChem. It
transforms a four-dimensional tensor from one basis to
another. This transformation is most efficiently
implemented as a sequence of four tensor contractions
that each contract a four-dimensional tensor with a
two-dimensional transformation matrix. Differing
degrees of permutation symmetry in the intermediate and
final tensors in the sequence of contractions cause
intermediate tensors to be much larger than the final
tensor and limit the number of electronic states in the
modeled systems. Loop fusion, in conjunction with
tiling, can be very effective in reducing the total
space requirement, as well as data movement. However,
the large number of possible choices for loop fusion
and tiling, and data/computation distribution across a
parallel system, make it challenging to develop an
optimized parallel implementation for the four-index
integral transform. We develop a novel approach to
address this problem, using lower bounds modeling of
data movement complexity. We establish relationships
between available aggregate physical memory in a
parallel computer system and ineffective fusion
configurations, enabling their pruning and consequent
identification of effective choices and a
characterization of optimality criteria. This work has
resulted in the development of a significantly improved
implementation of the four-index transform that enables
higher performance and the ability to model larger
electronic systems than the current implementation in
the NWChem quantum chemistry software suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Steele:2017:UBP,
author = "Guy L. {Steele, Jr.} and Jean-Baptiste Tristan",
title = "Using Butterfly-Patterned Partial Sums to Draw from
Discrete Distributions",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "341--355",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018757",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a SIMD technique for drawing values from
multiple discrete distributions, such as sampling from
the random variables of a mixture model, that avoids
computing a complete table of partial sums of the
relative probabilities. A table of alternate
(``butterfly-patterned'') form is faster to compute,
making better use of coalesced memory accesses; from
this table, complete partial sums are computed on the
fly during a binary search. Measurements using CUDA 7.5
on an NVIDIA Titan Black GPU show that this technique
makes an entire machine-learning application that uses
a Latent Dirichlet Allocation topic model with 1024
topics about about 13\% faster (when using
single-precision floating-point data) or about 35\%
faster (when using double-precision floating-point
data) than doing a straightforward matrix transposition
after using coalesced accesses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Basin:2017:KKV,
author = "Dmitry Basin and Edward Bortnikov and Anastasia
Braginsky and Guy Golan-Gueta and Eshcar Hillel and
Idit Keidar and Moshe Sulamy",
title = "{KiWi}: a Key--Value Map for Scalable Real-Time
Analytics",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "357--369",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018761",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern big data processing platforms employ huge
in-memory key--value (KV) maps. Their applications
simultaneously drive high-rate data ingestion and
large-scale analytics. These two scenarios expect
KV-map implementations that scale well with both
real-time updates and large atomic scans triggered by
range queries. We present KiWi, the first atomic KV-map
to efficiently support simultaneous large scans and
real-time access. The key to achieving this is treating
scans as first class citizens,and organizing the data
structure around them. KiWi provides wait-free scans,
whereas its put operations are lightweight and
lock-free. It optimizes memory management jointly with
data structure access.We implement KiWi and compare it
to state-of-the-art solutions. Compared to other
KV-maps providing atomic scans, KiWi performs either
long scans or concurrent puts an order of magnitude
faster. Its scans are twice as fast as non-atomic ones
implemented via iterators in the Java skiplist.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Jiang:2017:GAP,
author = "Lin Jiang and Zhijia Zhao",
title = "Grammar-aware Parallelization for Scalable {XPath}
Querying",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "371--383",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018772",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Semi-structured data emerge in many domains,
especially in web analytics and business intelligence.
However, querying such data is inherently sequential
due to the nested structure of input data. Existing
solutions pessimistically enumerate all execution paths
to circumvent dependencies, yielding sub-optimal
performance and limited scalability. This paper
presents GAP, a parallelization scheme that, for the
first time, leverages the grammar of the input data to
boost the parallelization efficiency. GAP leverages
static analysis to infer feasible execution paths for
specific contexts based on the grammar of the
semi-structured data. It can eliminate unnecessary
paths without compromising the correctness. In the
absence of a pre-defined grammar, GAP switches into a
speculative execution mode and takes potentially
incomplete grammar extracted either from prior inputs.
Together, the dual-mode GAP reduces the execution paths
from all paths to a minimum, therefore maximizing the
parallelization efficiency and scalability. The
benefits of path elimination go beyond reducing extra
computation --- it also enables the use of more
efficient data structures, which further improves the
efficiency. An evaluation on a large set of standard
benchmarks with diverse queries shows that GAP yields
significant efficiency increase and boosts the speedup
of the state-of-the-art from 2.9X to 17.6X on a 20-core
machine for a set of 200 queries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Wang:2017:ESC,
author = "Xin Wang and Weihua Zhang and Zhaoguo Wang and Ziyun
Wei and Haibo Chen and Wenyun Zhao",
title = "{Eunomia}: Scaling Concurrent Search Trees under
Contention Using {HTM}",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "385--399",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018752",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While hardware transactional memory (HTM) has recently
been adopted to construct efficient concurrent search
tree structures, such designs fail to deliver scalable
performance under contention. In this paper, we first
conduct a detailed analysis on an HTM-based concurrent
B+Tree, which uncovers several reasons for excessive
HTM aborts induced by both false and true conflicts
under contention. Based on the analysis, we advocate
Eunomia, a design pattern for search trees which
contains several principles to reduce HTM aborts,
including splitting HTM regions with version-based
concurrency control to reduce HTM working sets,
partitioned data layout to reduce false conflicts,
proactively detecting and avoiding true conflicts, and
adaptive concurrency control. To validate their
effectiveness, we apply such designs to construct a
scalable concurrent B+Tree using HTM. Evaluation using
key--value store benchmarks on a 20-core HTM-capable
multi-core machine shows that Eunomia leads to 5x--11x
speedup under high contention, while incurring small
overhead under low contention.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Tang:2017:SCM,
author = "Xiongchao Tang and Jidong Zhai and Bowen Yu and
Wenguang Chen and Weimin Zheng",
title = "Self-Checkpoint: an In-Memory Checkpoint Method Using
Less Space and Its Practice on Fault-Tolerant {HPL}",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "401--413",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018745",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Fault tolerance is increasingly important in high
performance computing due to the substantial growth of
system scale and decreasing system reliability.
In-memory/diskless checkpoint has gained extensive
attention as a solution to avoid the IO bottleneck of
traditional disk-based checkpoint methods. However,
applications using previous in-memory checkpoint suffer
from little available memory space. To provide high
reliability, previous in-memory checkpoint methods
either need to keep two copies of checkpoints to
tolerate failures while updating old checkpoints or
trade performance for space by flushing in-memory
checkpoints into disk. In this paper, we propose a
novel in-memory checkpoint method, called
self-checkpoint, which can not only achieve the same
reliability of previous in-memory checkpoint methods,
but also increase the available memory space for
applications by almost 50\%. To validate our method, we
apply the self-checkpoint to an important problem,
fault tolerant HPL. We implement a scalable and fault
tolerant HPL based on this new method, called SKT-HPL,
and validate it on two large-scale systems.
Experimental results with 24,576 processes show that
SKT-HPL achieves over 95\% of the performance of the
original HPL. Compared to the state-of-the-art
in-memory checkpoint method, it improves the available
memory size by 47\% and the performance by 5\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Wu:2017:SDC,
author = "Panruo Wu and Nathan DeBardeleben and Qiang Guan and
Sean Blanchard and Jieyang Chen and Dingwen Tao and Xin
Liang and Kaiming Ouyang and Zizhong Chen",
title = "Silent Data Corruption Resilient Two-sided Matrix
Factorizations",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "415--427",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018750",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents an algorithm based fault tolerance
method to harden three two-sided matrix factorizations
against soft errors: reduction to Hessenberg form,
tridiagonal form, and bidiagonal form. These two sided
factorizations are usually the prerequisites to
computing eigenvalues/eigenvectors and singular value
decomposition. Algorithm based fault tolerance has been
shown to work on three main one-sided matrix
factorizations: LU, Cholesky, and QR, but extending it
to cover two sided factorizations is non-trivial
because there are no obvious {\it offline, problem}
specific maintenance of checksums. We thus develop an
{\it online, algorithm} specific checksum scheme and
show how to systematically adapt the two sided
factorization algorithms used in LAPACK and ScaLAPACK
packages to introduce the algorithm based fault
tolerance. The resulting ABFT scheme can detect and
correct arithmetic errors {\it continuously} during the
factorizations that allow timely error handling.
Detailed analysis and experiments are conducted to show
the cost and the gain in resilience. We demonstrate
that our scheme covers a significant portion of the
operations of the factorizations. Our checksum scheme
achieves high error detection coverage and error
correction coverage compared to the state of the art,
with low overhead and high scalability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Arbel-Raviv:2017:PRD,
author = "Maya Arbel-Raviv and Trevor Brown",
title = "{Poster}: Reuse, don't Recycle: Transforming
Algorithms that Throw Away Descriptors",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "429--430",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019035",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Lock-free algorithms guarantee progress by having
threads help one another. Complex lock-free operations
facilitate helping by creating descriptor objects that
describe how other threads should help them. In many
lock-free algorithms, a new descriptor is allocated for
each operation. After an operation completes, its
descriptor must be reclaimed by a memory reclamation
scheme. Allocating and reclaiming descriptors
introduces significant space and time overhead. We
present a transformation for a class of lock-free
algorithms that allows each thread to efficiently reuse
a single descriptor. Experiments on a variety of
workloads show that our transformation yields
significant improvements over implementations that
reclaim descriptors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Balaji:2017:PAP,
author = "Vignesh Balaji and Dhruva Tirumala and Brandon Lucia",
title = "{Poster}: an Architecture and Programming Model for
Accelerating Parallel Commutative Computations via
Privatization",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "431--432",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019030",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Synchronization and data movement are the key
impediments to an efficient parallel execution. To
ensure that data shared by multiple threads remain
consistent, the programmer must use synchronization
(e.g., mutex locks) to serialize threads' accesses to
data. This limits parallelism because it forces threads
to sequentially access shared resources. Additionally,
systems use cache coherence to ensure that processors
always operate on the most up-to-date version of a
value even in the presence of private caches. Coherence
protocol implementations cause processors to serialize
their accesses to shared data, further limiting
parallelism and performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Bhattacharyya:2017:PHE,
author = "Arnamoy Bhattacharyya and Mike Dai Wang and Mihai
Burcea and Yi Ding and Allen Deng and Sai Varikooty and
Shafaaf Hossain and Cristiana Amza",
title = "{Poster}: {HythTM}: Extending the Applicability of
{Intel TSX} Hardware Transactional Support",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "433--434",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019027",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this work, we introduce and experimentally evaluate
a new hybrid software-hardware Transactional Memory
prototype based on Intel's Haswell TSX architecture.
Our prototype extends the applicability of the existing
hardware support for TM by interposing a hybrid
fall-back layer before the sequential, big-lock
fall-back path, used by standard TSX-supported
solutions in order to guarantee progress. In our
experimental evaluation we use SynQuake, a realistic
game benchmark modeled after Quake. Our results show
that our hybrid transactional system,which we call
HythTM, is able to reduce the number of transactions
that go to the sequential software layer, hence
avoiding hardware transaction aborts and loss of
parallelism. HythTM optimizes application throughput
and scalability up to 5.05x, when compared to the
hardware TM with sequential fall-back path.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Chowdhury:2017:PPE,
author = "Rezaul Chowdhury and Pramod Ganapathi and Yuan Tang
and Jesmin Jahan Tithi",
title = "{Poster}: Provably Efficient Scheduling of
Cache-Oblivious Wavefront Algorithms",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "435--436",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019031",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Standard cache-oblivious recursive divide-and-conquer
algorithms for evaluating dynamic programming
recurrences have optimal serial cache complexity but
often have lower parallelism compared with iterative
wavefront algorithms due to artificial dependencies
among subtasks. Very recently cache-oblivious recursive
wavefront (COW) algorithms have been introduced which
do not have any artificial dependencies. Though COW
algorithms are based on fork-join primitives, they
extensively use atomic operations, and as a result,
performance guarantees provided by state-of-the-art
schedulers for programs with fork-join primitives do
not apply. In this work, we show how to systematically
transform standard cache-oblivious recursive
divide-and-conquer algorithms into recursive wavefront
algorithms to achieve optimal parallel cache complexity
and high parallelism under state-of-the-art schedulers
for fork-join programs. Unlike COW algorithms these new
algorithms do not use atomic operations. Instead, they
use closed-form formulas to compute at what time each
recursive function must be launched in order to achieve
high parallelism without losing cache performance. The
resulting implementations are arguably much simpler
than implementations of known COW algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Cohen:2017:PST,
author = "Nachshon Cohen and Maurice Herlihy and Erez Petrank
and Elias Wald",
title = "{Poster}: State Teleportation via Hardware
Transactional Memory",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "437--438",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019026",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "State teleportation is a new technique for exploiting
hardware transactional memory (HTM) to improve existing
synchronization and memory management schemes for
highly-concurrent data structures. When applied to
fine-grained locking, a thread holding the lock for a
node launches a hardware transaction that traverses
multiple successor nodes, acquires the lock for the
last node reached, and releases the lock on the
starting node, skipping lock acquisitions for
intermediate nodes. When applied to lock-free data
structures, a thread visiting a node protected by a
hazard pointer launches a hardware transaction that
traverses multiple successor nodes, and publishes the
hazard pointer only for the last node reached, skipping
the memory barriers needed to publish intermediate
hazard pointers. Experimental results show that these
applications of state teleportation can substantially
increase the performance of both lock-based and
lock-free data structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Dai:2017:PII,
author = "Dong Dai and Wei Zhang and Yong Chen",
title = "{Poster}: {IOGP}: an Incremental Online Graph
Partitioning for Large-Scale Distributed Graph
Databases",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "439--440",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019037",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large-scale graphs are becoming critical in various
domains such as social network, scientific application,
knowledge discovery, and even system software, etc.
Many of those use cases require large-scale
high-performance graph databases, which are designed
for serving continuous updates from the clients, and at
the same time, answering complex queries towards the
current graph in an on-line manner. Those operations in
graph databases, also referred as OLTP (online
transaction processing) operations, need specific
design and implementation in graph partitioning
algorithms. In this study, we designed an incremental
online graph partitioning (IOGP), optimized for OLTP
workloads. It is designed to achieve better locality,
generate balanced partitions, and increase the
parallelism for accessing hotspots of the graph. Our
evaluation results on both real world and synthetic
graphs in both simulation and real system confirm a
better performance on graph queries (as much as 2X)
with small overheads during graph insertion (less than
10\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Firoz:2017:PDC,
author = "Jesun Shariar Firoz and Thejaka Amila Kanewala and
Marcin Zalewski and Martina Barnas and Andrew
Lumsdaine",
title = "{Poster}: Distributed Control: The Benefits of
Eliminating Global Synchronization via Effective
Scheduling",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "441--442",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019036",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In distributed computing, parallel overheads such as
\emph{synchronization overhead} may hinder performance.
We introduce the idea of \emph{Distributed Control}
(DC) where global synchronization is reduced to
\emph{termination detection} and each worker proceeds
ahead optimistically, based on the local knowledge of
the global computation. To avoid ``wasted'' work, \DC
relies on local work prioritization. However, the work
order obtained by local prioritization is susceptible
to interference from the runtime. We show that
employing effective scheduling policies and
optimizations in the runtime, in conjunction with
eliminating global barriers, improves performance in
two graph applications: single-source shortest paths
and connected components.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Jo:2017:PMA,
author = "Gangwon Jo and Jaehoon Jung and Jiyoung Park and
Jaejin Lee",
title = "{Poster}: {MAPA}: an Automatic Memory Access Pattern
Analyzer for {GPU} Applications",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "443--444",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019034",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Various existing optimization and memory consistency
management techniques for GPU applications rely on
memory access patterns of kernels. However, they suffer
from poor practicality because they require explicit
user interventions to extract kernel memory access
patterns. This paper proposes an automatic
memory-access-pattern analysis framework called MAPA.
MAPA is based on a source-level analysis technique
derived from traditional symbolic analyses and a
run-time pattern selection technique. The experimental
results show that MAPA properly analyzes 116 real-world
OpenCL kernels from Rodinia and Parboil.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Li:2017:PCO,
author = "Shigang Li and Yunquan Zhang and Torsten Hoefler",
title = "{Poster}: Cache-Oblivious {MPI} All-to-All
Communications on Many-Core Architectures",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "445--446",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019025",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the many-core era, the performance of MPI
collectives is more dependent on the intra-node
communication component. However, the communication
algorithms generally inherit from the inter-node
version and ignore the cache complexity. We propose
cache-oblivious algorithms for MPI all-to-all
operations, in which data blocks are copied into the
receive buffers in Morton order to exploit data
locality. Experimental results on different many-core
architectures show that our cache-oblivious
implementations significantly outperform the naive
implementations based on shared heap and the highly
optimized MPI libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Menon:2017:PAL,
author = "Harshitha Menon and Kavitha Chandrasekar and Laxmikant
V. Kale",
title = "{Poster}: Automated Load Balancer Selection Based on
Application Characteristics",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "447--448",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019033",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many HPC applications require dynamic load balancing
to achieve high performance and system utilization.
Different applications have different characteristics
and hence require different load balancing strategies.
Invocation of a suboptimal load balancing strategy can
lead to inefficient execution. We propose
Meta-Balancer, a framework to automatically decide the
best load balancing strategy. It employs randomized
decision forests, a machine learning method, to learn a
model for choosing the best load balancing strategy for
an application represented by a set of features that
capture the application characteristics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Moscovici:2017:PGF,
author = "Nurit Moscovici and Nachshon Cohen and Erez Petrank",
title = "{Poster}: a {GPU}-Friendly Skiplist Algorithm",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "449--450",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019032",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a design for a fine-grained lock-based
skiplist optimized for Graphics Processing Units
(GPUs). While GPUs are often used to accelerate
streaming parallel computations, it remains a
significant challenge to efficiently offload concurrent
computations with more complicated data-irregular
access and fine-grained synchronization. Natural
building blocks for such computations would be
concurrent data structures, such as skiplists, which
are widely used in general purpose computations. Our
design utilizes array-based nodes which are accessed
and updated by warp-cooperative functions, thus taking
advantage of the fact that GPUs are most efficient when
memory accesses are coalesced and execution divergence
is minimized. The proposed design has been implemented,
and measurements demonstrate improved performance of up
to 2.6x over skiplist designs for the GPU existing
today.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Ramalhete:2017:PPM,
author = "Pedro Ramalhete and Andreia Correia",
title = "{Poster}: Poor Man's {URCU}",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "451--452",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019021",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "RCU is, among other things, a well known mechanism for
memory reclamation that is meant to be used in
languages without an automatic Garbage Collector,
unfortunately, it requires operating system support,
which is currently provided only in Linux. An
alternative is to use Userspace RCU (URCU) which has
two variants that can be deployed on other operating
systems, named \emph{Memory Barrier} and \emph{Bullet
Proof}. We present a novel algorithm that implements
the three core APIs of RCU: \texttt{rcu\_read\_lock()},
\texttt{rcu\_read\_unlock()}, and
\texttt{synchronize\_rcu()}. Our algorithm uses one
mutual exclusion lock and two reader-writer locks with
\texttt{trylock()} capabilities, which means it does
not need a language with a memory model or atomics API,
and as such, it can be easily implemented in almost any
language, regardless of the underlying CPU
architecture, or operating system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Ramalhete:2017:PWF,
author = "Pedro Ramalhete and Andreia Correia",
title = "{Poster}: a Wait-Free Queue with Wait-Free Memory
Reclamation",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "453--454",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019022",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Queues are a widely deployed data structure. They are
used extensively in many multi threaded applications,
or as a communication mechanism between threads or
processes. We propose a new linearizable
multi-producer-multi-consumer queue we named Turn
queue, with wait-free progress bounded by the number of
threads, and with wait-free bounded memory reclamation.
Its main characteristics are: a simple algorithm that
does no memory allocation apart from creating the node
that is placed in the queue, a new wait-free consensus
algorithm using only the atomic instruction
compare-and-swap (CAS), and is easy to plugin with
other algorithms for either enqueue or dequeue
methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Tang:2017:PSS,
author = "Yuan Tang and Ronghui You",
title = "{Poster}: {STAR} (Space-Time Adaptive and Reductive)
Algorithms for Real-World Space-Time Optimality",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "455--456",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019029",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It's important to hit a space-time balance for a
real-world algorithm to achieve high performance on
modern shared-memory multi-core or many-core systems.
However, a large class of dynamic programs with more
than $ O(1) $ dependency achieve optimality either in
space or time, but not both. In the literature, the
problem is known as the fundamental space-time
tradeoff. By exploiting properly on the runtime system,
we show that our STAR (Space-Time Adaptive and
Reductive) technique can help these dynamic programs to
achieve sublinear parallel time bounds while still
maintaining work-, space-, and cache-optimality in a
processor- and cache-oblivious fashion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Wu:2017:PRP,
author = "Mingyu Wu and Haibing Guan and Binyu Zang and Haibo
Chen",
title = "{Poster}: Recovering Performance for Vector-based
Machine Learning on Managed Runtime",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "457--458",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019039",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Zhang:2017:PPC,
author = "Minjia Zhang and Swarnendu Biswas and Michael D.
Bond",
title = "{Poster}: On the Problem of Consistency Exceptions in
the Context of Strong Memory Models",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "459--460",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019024",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This work considers the problem of availability for
memory models that throw consistency exceptions. We
define a new memory model called RIx based on isolation
of synchronization-free regions and a new approach
called Avalon that provides RIx. Our evaluation shows
that Avalon and RIx substantially reduce consistency
exceptions, by 1-3 orders of magnitude and sometimes
eliminate them completely. Furthermore, our exploration
provides new, compelling points in the
performance-availability tradeoff space.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Zhao:2017:PIH,
author = "Yue Zhao and Chunhua Liao and Xipeng Shen",
title = "{Poster}: an Infrastructure for {HPC} Knowledge
Sharing and Reuse",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "461--462",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019023",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a prototype infrastructure for
addressing the barriers for effective accumulation,
sharing, and reuse of the various types of knowledge
for high performance parallel computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Shen:2017:BGB,
author = "Xipeng Shen",
title = "Bridging the gap between memory performance and
massive parallelism: the critical role of programming
systems innovations (keynote)",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "1--1",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This talk examines some trends in the modern
developments of memory systems and their relations with
the massive parallelism in processors and applications.
It then draws on some recent work on GPU to explain the
important role of programming systems in bridging the
gap; it particularly emphasizes the importance of
innovations for enabling better software
controllability, more software elasticity, and
inter-thread data locality enhancements. The talk
further discusses the implications brought to
programming systems by the increasingly blurred
boundaries among memory, storage, and processing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Bruno:2017:NPG,
author = "Rodrigo Bruno and Lu{\'\i}s Picciochi Oliveira and
Paulo Ferreira",
title = "{NG2C}: pretenuring garbage collection with dynamic
generations for {HotSpot} big data applications",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "2--13",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092272",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Big Data applications suffer from unpredictable and
unacceptably high pause times due to Garbage Collection
(GC). This is the case in latency-sensitive
applications such as on-line credit-card fraud
detection, graph-based computing for analysis on social
networks, etc. Such pauses compromise latency
requirements of the whole application stack and result
from applications' aggressive buffering/caching of
data, exposing an ill-suited GC design, which assumes
that most objects will die young and does not consider
that applications hold large amounts of middle-lived
data in memory. To avoid such pauses, we propose NG2C,
a new GC algorithm that combines pretenuring with
user-defined dynamic generations. By being able to
allocate objects into different generations, NG2C is
able to group objects with similar lifetime profiles in
the same generation. By allocating objects with similar
lifetime profiles close to each other, i.e. in the same
generation, we avoid object promotion (copying between
generations) and heap fragmentation (which leads to
heap compactions) both responsible for most of the
duration of HotSpot GC pause times. NG2C is implemented
for the OpenJDK 8 HotSpot Java Virtual Machine, as an
extension of the Garbage First GC. We evaluate NG2C
using Cassandra, Lucene, and GraphChi with three
different GCs: Garbage First (G1), Concurrent Mark
Sweep (CMS), and NG2C. Results show that NG2C decreases
the worst observable GC pause time by up to 94.8\% for
Cassandra, 85.0\% for Lucene and 96.45\% for GraphChi,
when compared to current collectors (G1 and CMS). In
addition, NG2c has no negative impact on application
throughput or memory usage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Yang:2017:TAA,
author = "Albert Mingkun Yang and Tobias Wrigstad",
title = "Type-assisted automatic garbage collection for
lock-free data structures",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "14--24",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092274",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce Isolde, an automatic garbage collection
scheme designed specifically for managing memory in
lock-free data structures, such as stacks, lists, maps
and queues. Isolde exists as a plug-in memory manager,
designed to sit on-top of another memory manager, and
use it's allocator and reclaimer (if exists). Isolde
treats a lock-free data structure as a logical heap,
isolated from the rest of the program. This allows
garbage collection outside of Isolde to take place
without affecting the lock-free data structure. Isolde
further manages objects allocated on a Isolde heap in a
fully concurrent manner, allowing garbage collection to
incrementally remove garbage without stopping other
threads doing work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Vrvilo:2017:MDF,
author = "Nick Vrvilo and Lechen Yu and Vivek Sarkar",
title = "A marshalled data format for pointers in relocatable
data blocks",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "25--35",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092276",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As future computing hardware progresses towards
extreme-scale technology, new challenges arise for
addressing heterogeneous compute and memory resources,
for providing application resilience in the presence of
more frequent failures, and for working within strict
energy constraints. While C++ has gained popularity in
recent years within the HPC community, some concepts of
object-oriented program design may be at odds with the
techniques we use to address the challenges of
extreme-scale computing. In this work, we focus on the
challenges related to using aggregate data structures
that include pointer values within a programming model
where the runtime may frequently relocate data, and
traditional serialization techniques are not practical.
We propose and evaluate a marshalled encoding for
relocatable data blocks, and present a C++ library and
other tools to simplify the work of the application
programmer developing new applications or porting
existing applications to such emerging programming
models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Liu:2017:FEM,
author = "Zhengyang Liu and John Criswell",
title = "Flexible and efficient memory object metadata",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "36--46",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092268",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Compiler-based tools can protect software from attack
and find bugs within programs. To support programs
written in type-unsafe languages such as C, such tools
need to add code into a program that must, at run-time,
take a pointer into a memory object and locate metadata
for that memory object. Current methods of locating
metadata are either flexible (supporting metadata of
varying sizes) at the expense of speed and scalability
or are fast (e.g., by using shadow tables) at the cost
of flexibility (metadata is small and must always be
the same size). This paper presents a new method of
attaching metadata to memory objects, named Padding
Area MetaData (PAMD), that is both flexible and
efficient. Metadata can be any size, and different
memory objects can have different sized metadata. While
flexible, the algorithm for finding the metadata given
a pointer into the memory object takes constant time.
Our method extends Baggy Bounds with Accurate Checking
(BBAC) which attaches constant-sized metadata to memory
objects for performing precise dynamic bounds checks.
Our design supports variable-sized metadata, and our
implementation supports larger programs. We evaluated
the performance and scalability of PAMD using dynamic
bounds checking as an exemplar of our method. Our
results show that our method adds at most 33\% overhead
to an identical dynamic bounds checking tool that
trades precision for performance by using a simple
shadow table. Our results also show that our method,
while having the same flexibility as splay trees,
performs significantly faster and scales better as a
program allocates more memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Vorobyov:2017:SSE,
author = "Kostyantyn Vorobyov and Julien Signoles and Nikolai
Kosmatov",
title = "Shadow state encoding for efficient monitoring of
block-level properties",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "47--58",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092269",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Memory shadowing associates addresses from an
application's memory to values stored in a disjoint
memory space called shadow memory. At runtime shadow
values store metadata about application memory
locations they are mapped to. Shadow state encodings
--- the structure of shadow values and their
interpretation --- vary across different tools.
Encodings used by the state-of-the-art monitoring tools
have been proven useful for tracking memory at a
byte-level, but cannot address properties related to
memory block boundaries. Tracking block boundaries is
however crucial for spatial memory safety analysis,
where a spatial violation such as out-of-bounds access,
may dereference an allocated location belonging to an
adjacent block or a different struct member. This paper
describes two novel shadow state encodings which
capture block-boundary-related properties. These
encodings have been implemented in E-ACSL --- a runtime
verification tool for C programs. Initial experiments
involving checking validity of pointer and array
accesses in computationally intensive runs of programs
selected from SPEC CPU benchmarks demonstrate runtime
and memory overheads comparable to state-of-the-art
memory debuggers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Dashti:2017:AMM,
author = "Mohammad Dashti and Alexandra Fedorova",
title = "Analyzing memory management methods on integrated
{CPU--GPU} systems",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "59--69",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092256",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous systems that integrate a multicore CPU
and a GPU on the same die are ubiquitous. On these
systems, both the CPU and GPU share the same physical
memory as opposed to using separate memory dies.
Although integration eliminates the need to copy data
between the CPU and the GPU, arranging transparent
memory sharing between the two devices can carry large
overheads. Memory on CPU/GPU systems is typically
managed by a software framework such as OpenCL or CUDA,
which includes a runtime library, and communicates with
a GPU driver. These frameworks offer a range of memory
management methods that vary in ease of use,
consistency guarantees and performance. In this study,
we analyze some of the common memory management methods
of the most widely used software frameworks for
heterogeneous systems: CUDA, OpenCL 1.2, OpenCL 2.0,
and HSA, on NVIDIA and AMD hardware. We focus on
performance/functionality trade-offs, with the goal of
exposing their performance impact and simplifying the
choice of memory management methods for programmers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Giles:2017:CCH,
author = "Ellis Giles and Kshitij Doshi and Peter Varman",
title = "Continuous checkpointing of {HTM} transactions in
{NVM}",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "70--81",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092270",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper addresses the challenges of coupling byte
addressable non-volatile memory (NVM) and hardware
transaction memory (HTM) in high-performance
transaction processing. We first show that HTM
transactions can be ordered using existing processor
instructions without any hardware changes. In contrast,
existing solutions posit changes to HTM mechanisms in
the form of special instructions or modified
functionality. We exploit the ordering mechanism to
design a novel persistence method that decouples HTM
concurrency from back-end NVM operations. Failure
atomicity is achieved using redo logging coupled with
aliasing to guard against mistimed cache evictions. Our
algorithm uses efficient lock-free mechanisms with
bounded static memory requirements. We evaluated our
approach using both micro-benchmarks, and, benchmarks
in the STAMP suite, and showed that it compares well
with standard (volatile) HTM transactions. We also
showed that it yields significant gains in throughput
and latency in comparison with persistent transactional
locking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Peng:2017:RTD,
author = "Ivy Bo Peng and Roberto Gioiosa and Gokcen Kestor and
Pietro Cicotti and Erwin Laure and Stefano Markidis",
title = "{RTHMS}: a tool for data placement on hybrid memory
system",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "82--91",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092273",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Traditional scientific and emerging data analytics
applications require fast, power-efficient, large, and
persistent memories. Combining all these
characteristics within a single memory technology is
expensive and hence future supercomputers will feature
different memory technologies side-by-side. However, it
is a complex task to program hybrid-memory systems and
to identify the best object-to-memory mapping. We
envision that programmers will probably resort to use
default configurations that only require minimal
interventions on the application code or system
settings. In this work, we argue that intelligent,
fine-grained data placement can achieve higher
performance than default setups. We present an
algorithm for data placement on hybrid-memory systems.
Our algorithm is based on a set of single-object
allocation rules and global data placement decisions.
We also present RTHMS, a tool that implements our
algorithm and provides recommendations about the
object-to-memory mapping. Our experiments on a hybrid
memory system, an Intel Knights Landing processor with
DRAM and HBM, show that RTHMS is able to achieve higher
performance than the default configuration. We believe
that RTHMS will be a valuable tool for programmers
working on complex hybrid-memory systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Kanvar:2017:WNG,
author = "Vini Kanvar and Uday P. Khedker",
title = "``{What}'s in a name?'' going beyond allocation site
names in heap analysis",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "92--103",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092267",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A points-to analysis computes a sound abstraction of
heap memory conventionally using a name-based
abstraction that summarizes runtime memory by grouping
locations using the names of allocation sites: All
concrete heap locations allocated by the same statement
are grouped together. The locations in the same group
are treated alike i.e., a pointer to any one location
of the group is assumed to point to every location in
the group leading to an over-approximation of points-to
relations. We propose an access-based abstraction that
partitions each name-based group of locations into
equivalence classes at every program point using an
additional criterion of the sets of access paths
(chains of pointer indirections) reaching the locations
in the memory. The intuition is that the locations that
are both allocated and accessed alike should be grouped
into the same equivalence class. Since the access paths
in the memory could reach different locations at
different program points, our groupings change flow
sensitively unlike the name-based groupings. This
creates a more precise view of the memory.
Theoretically, it is strictly more precise than the
name-based abstraction except in some trivial cases;
practically it is far more precise. Our empirical
measurements show the benefits of our tool Access-Based
Heap Analyzer (ABHA) on SPEC CPU 2006 and heap
manipulating SV-COMP benchmarks. ABHA, which is field-,
flow-, and context-sensitive, scales to 20 kLoC and can
improve the precision even up to 99\% (in terms of the
number of aliases). Additionally, ABHA allows any
user-defined summarization of an access path to be
plugged in; we have implemented and evaluated four
summarization techniques. ABHA can also act as a
front-end to TVLA, a parametrized shape analyzer, in
order to automate its parametrization by generating
predicates that capture the program behaviour more
accurately.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Fang:2017:RHF,
author = "Bin Fang and Mihaela Sighireanu",
title = "A refinement hierarchy for free list memory
allocators",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "104--114",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092275",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Existing implementations of dynamic memory allocators
(DMA) employ a large spectrum of policies and
techniques. The formal specifications of these
techniques are quite complicated in isolation and very
complex when combined. Therefore, the formal reasoning
on a specific DMA implementation is difficult for
automatic tools and mostly single-use. This paper
proposes a solution to this problem by providing formal
models for a full class of DMA, the free list class. To
obtain manageable formal reasoning and reusable formal
models, we organize these models in a hierarchy ranked
by refinement relations. We prove the soundness of
models and refinement relations using an off-the-shelf
theorem prover. We demonstrate that our hierarchy is a
basis for an algorithm theory for the class of free
list DMA: it abstracts various existing implementations
of DMA and leads to new DMA implementations. We
illustrate its application to model-based code
generation, testing, run-time verification, and static
analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Zhang:2017:ACE,
author = "Minjia Zhang and Swarnendu Biswas and Michael D.
Bond",
title = "Avoiding consistency exceptions under strong memory
models",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "115--127",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092271",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Shared-memory languages and systems generally provide
weak or undefined semantics for executions with data
races. Prior work has proposed memory consistency
models that ensure well-defined, easy-to-understand
semantics based on region serializability (RS), but the
resulting system may throw a consistency exception in
the presence of a data race. Consistency exceptions can
occur unexpectedly even in well-tested programs,
hurting availability and thus limiting the practicality
of RS-based memory models. To our knowledge, this paper
is the first to consider the problem of availability
for memory consistency models that throw consistency
exceptions. We first extend existing approaches that
enforce RSx, a memory model based on serializability of
synchronization-free regions (SFRs), to avoid region
conflicts and thus consistency exceptions. These new
approaches demonstrate both the potential for and
limitations of avoiding consistency exceptions under
RSx. To improve availability further, we introduce (1)
a new memory model called RIx based on isolation of
SFRs and (2) a new approach called Avalon that provides
RIx. We demonstrate two variants of Avalon that offer
different performance--availability tradeoffs for RIx.
An evaluation on real Java programs shows that this
work's novel approaches are able to reduce consistency
exceptions, thereby improving the applicability of
strong memory consistency models. Furthermore, the
approaches provide compelling points in the
performance--availability tradeoff space for memory
consistency enforcement. RIx and Avalon thus represent
a promising direction for tackling the challenge of
availability under strong consistency models that throw
consistency exceptions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '17 conference proceedings.",
}
@Article{Remy:2017:OEP,
author = "Didier R{\'e}my",
title = "{Ornaments}: exploiting parametricity for safer, more
automated code refactorization and code reuse (invited
talk)",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "1--1",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3127333",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "Inductive datatypes and parametric polymorphism are
two key features introduced in the ML family of
languages, which have already been widely exploited for
structuring programs: Haskell and ML programs are often
more elegant and more correct by construction. Still,
we sometimes need code to be refactored or adapted to
be reused in a slightly different context. While the
type system is considerably helpful in these
situations, by automatically locating type-inconsistent
program points or incomplete pattern matchings, this
process could be made safer and more automated by
further exploiting parametricity. We propose a
posteriori program abstraction as a principle for such
code transformations. We apply this principle to
ornamentation which is a way to describe changes in
datatype definitions reorganizing, adding, or dropping
some pieces of data so that functions operating on the
bare definition can be partially and sometimes totally
lifted into functions operating on the ornamented
structure. We view ornamentation as an a posteriori
abstraction of the bare code, called a generic lifting,
which can then be instantiated into a concrete lifting,
meta-reduced, and simplified. Both the source and
target code live in core ML while the lifted code lives
in a meta-language above ML equipped with a limited
form of dependent types needed to capture some
invariants of the generic lifting so that the concrete
lifting can be simplified back into an ML program.
Importantly, the lifted code can be closely related to
the bare code, using logical relations thanks to the
generic lifting detour. Different, typical use cases of
ornaments will be shown and the approach will be mainly
illustrated on examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Mokhov:2017:AGC,
author = "Andrey Mokhov",
title = "Algebraic graphs with class (functional pearl)",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "2--13",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122956",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The paper presents a minimalistic and elegant approach
to working with graphs in Haskell. It is built on a
rigorous mathematical foundation --- an algebra of
graphs --- that allows us to apply equational reasoning
for proving the correctness of graph transformation
algorithms. Algebraic graphs let us avoid partial
functions typically caused by `malformed graphs' that
contain an edge referring to a non-existent vertex.
This helps to liberate APIs of existing graph libraries
from partial functions. The algebra of graphs can
represent directed, undirected, reflexive and
transitive graphs, as well as hypergraphs, by
appropriately choosing the set of underlying axioms.
The flexibility of the approach is demonstrated by
developing a library for constructing and transforming
polymorphic graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Blazevic:2017:PPP,
author = "Mario Blazevi{\'c} and Jacques L{\'e}gar{\'e}",
title = "Packrats parse in packs",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "14--25",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122958",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel but remarkably simple formulation
of formal language grammars in Haskell as functions
mapping a record of production parsers to itself. Thus
formulated grammars are first-class objects, composable
and reusable. We also provide a simple parser
implementation for them, based on an improved packrat
algorithm. In order to make the grammar manipulation
code reusable, we introduce a set of type classes
mirroring the existing type classes from Haskell base
library, but whose methods have rank-2 types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Lampropoulos:2017:ORU,
author = "Leonidas Lampropoulos and Antal Spector-Zabusky and
Kenneth Foner",
title = "Ode on a random urn (functional pearl)",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "26--37",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122959",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the urn, a simple tree-based data structure
that supports sampling from and updating discrete
probability distributions in logarithmic time. We avoid
the usual complexity of traditional self-balancing
binary search trees by not keeping values in a specific
order. Instead, we keep the tree maximally balanced at
all times using a single machine word of overhead: its
size. Urns provide an alternative interface for the
frequency combinator from the QuickCheck library that
allows for asymptotically more efficient sampling from
dynamically-updated distributions. They also facilitate
backtracking in property-based random testing, and can
be applied to such complex examples from the literature
as generating well-typed lambda terms or information
flow machine states, demonstrating significant
speedups.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Algehed:2017:QLT,
author = "Maximilian Algehed and Koen Claessen and Moa Johansson
and Nick Smallbone",
title = "{QuickSpec}: a lightweight theory exploration tool for
programmers (system demonstration)",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "38--39",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122960",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This document gives the outline of a system
demonstration for the QuickSpec theory exploration
tool.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Braquehais:2017:SDC,
author = "Rudy Braquehais and Colin Runciman",
title = "{Speculate}: discovering conditional equations and
inequalities about black-box functions by reasoning
from test results",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "40--51",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122961",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents Speculate, a tool that
automatically conjectures laws involving conditional
equations and inequalities about Haskell functions.
Speculate enumerates expressions involving a given
collection of Haskell functions, testing to separate
those expressions into apparent equivalence classes.
Expressions in the same equivalence class are used to
conjecture equations. Representative expressions of
different equivalence classes are used to conjecture
conditional equations and inequalities. Speculate uses
lightweight equational reasoning based on term
rewriting to discard redundant laws and to avoid
needless testing. Several applications demonstrate the
effectiveness of Speculate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Wiegley:2017:UCW,
author = "John Wiegley and Benjamin Delaware",
title = "Using {Coq} to write fast and correct {Haskell}",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "52--62",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122962",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Correctness and performance are often at odds in the
field of systems engineering, either because correct
programs are too costly to write or impractical to
execute, or because well-performing code involves so
many tricks of the trade that formal analysis is unable
to isolate the main properties of the algorithm. As a
prime example of this tension, Coq is an established
proof environment that allows writing correct,
dependently-typed code, but it has been criticized for
exorbitant development times, forcing the developer to
choose between optimal code or tractable proofs. On the
other side of the divide, Haskell has proven itself to
be a capable, well-typed programming environment, yet
easy-to-read, straightforward code must all too often
be replaced by highly optimized variants that obscure
the author's original intention. This paper builds on
the existing Fiat refinement framework to bridge this
divide, demonstrating how to derive a
correct-by-construction implementation that meets (or
exceeds) the performance characteristics of highly
optimized Haskell, starting from a high-level Coq
specification. To achieve this goal, we extend Fiat
with a stateful notion of refinement of abstract data
types and add support for extracting stateful code via
a free monad equipped with an algebra of
heap-manipulating operations. As a case study, we
reimplement a subset of the popular bytestring library,
with little to no loss of performance, while retaining
a high guarantee of program correctness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Vazou:2017:TTP,
author = "Niki Vazou and Leonidas Lampropoulos and Jeff
Polakow",
title = "A tale of two provers: verifying monoidal string
matching in liquid {Haskell} and {Coq}",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "63--74",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122963",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "We demonstrate for the first time that Liquid Haskell,
a refinement type checker for Haskell programs, can be
used for arbitrary theorem proving by verifying a
parallel, monoidal string matching algorithm
implemented in Haskell. We use refinement types to
specify correctness properties, Haskell terms to
express proofs of these properties, and Liquid Haskell
to check the proofs. We evaluate Liquid Haskell as a
theorem prover by replicating our 1428 LoC proof in a
dependently-typed language (Coq --- 1136 LoC). Finally,
we compare both proofs, uncovering the relative
advantages and disadvantages of the two provers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Ekblad:2017:MED,
author = "Anton Ekblad",
title = "A meta-{EDSL} for distributed web applications",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "75--85",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122969",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a domain-specific language for constructing
and configuring web applications distributed across any
number of networked, heterogeneous systems. Our
language is embedded in Haskell, provides a common
framework for integrating components written in
third-party EDSLs, and enables type-safe,
access-controlled communication between nodes, as well
as effortless sharing and movement of functionality
between application components. We give an
implementation of our language and demonstrate its
applicability by using it to implement several
important components of distributed web applications,
including RDBMS integration, load balancing, and
fine-grained sandboxing of untrusted third party code.
The rising popularity of cloud computing and
heterogeneous computer architectures is putting a
strain on conventional programming models, which
commonly assume that one application executes on one
machine, or at best on one out of several identical
machines. With our language, we take the first step
towards a programming model better suited for a
computationally multicultural future.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Dawson:2017:CNS,
author = "Justin Dawson and Mark Grebe and Andy Gill",
title = "Composable network stacks and remote monads",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "86--97",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122968",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Monads and applicative functors are two ways that
Haskell programmers bundle effectful primitives into
effectful program fragments. In this paper, we
investigate using monads and applicative functors to
bundle remote effectful primitives, specifically aiming
to amortize the cost of remote communications using
bundling. We look at several ways of maximizing the
bundling of primitives, drawing from the remote monad
design pattern and Haxl system, and provide a taxonomy
of mechanism for amortization, with examples. The
result of this investigation is that monadic fragments
can be efficiently bundled into packets, almost for
free, when given a user-supplied packet transportation
mechanism, and the primitives obey some simple pre- and
post-conditions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Quick:2017:AMH,
author = "Donya Quick",
title = "Algorithmic music in {Haskell} (invited talk)",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "98--98",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3127334",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional programming is becoming increasingly
popular in artistic areas such as algorithmic music
composition. Euterpea and Kulitta are two libraries for
working with music in Haskell. Euterpea is a library
for representing and manipulating basic musical
structures, and is useful both in a pedagogical setting
to teach functional programming through the arts and as
a tool to create complex pieces of algorithmic music.
Kulitta is a framework for automated composition that
addresses music at a more abstract level than Euterpea,
capturing aspects of musical style through geometric
models and probabilistic grammars. Both of these
libraries leverage Haskell's pure functional nature and
strong type system to achieve versatile, yet concise
designs that allow the creation of diverse and
interesting music. Features from these libraries have
also been integral in the design of newer systems for
natural language processing and artificial intelligence
in the musical domain. This talk will explore
challenges presented by creating these kinds of
domain-specific embedded languages for working with
music, and how taking functional approaches to them
yields elegant solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Szamozvancev:2017:WTM,
author = "Dmitrij Szamozvancev and Michael B. Gale",
title = "Well-typed music does not sound wrong (experience
report)",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "99--104",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122964",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Music description and generation are popular use cases
for Haskell, ranging from live coding libraries to
automatic harmonisation systems. Some approaches use
probabilistic methods, others build on the theory of
Western music composition, but there has been little
work done on checking the correctness of musical pieces
in terms of voice leading, harmony, and structure.
Haskell's recent additions to the type-system now
enable us to perform such analysis statically. We
present our experience of implementing a type-level
model of classical music and an accompanying EDSL which
enforce the rules of classical music at compile-time,
turning composition mistakes into compiler errors.
Along the way, we discuss the strengths and limitations
of doing this in Haskell and demonstrate that the type
system of the language is fully capable of expressing
non-trivial and practical logic specific to a
particular domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Perez:2017:BFT,
author = "Ivan Perez",
title = "Back to the future: time travel in {FRP}",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "105--116",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122957",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional Reactive Programming (FRP) allows
interactive applications to be modelled in a
declarative manner using time-varying values. For
practical reasons, however, operational constraints are
often imposed, such as having a fixed time domain, time
always flowing forward, and limiting the exploration of
the past. In this paper we show how these constraints
can be overcome, giving local control over the time
domain, the direction of time and the sampling step. We
study the behaviour of FRP expressions when time flows
backwards, and demonstrate how to synchronize
subsystems running asynchronously and at different
sampling rates. We have verified the practicality of
our approach with two non-trivial games in which time
control is central to the gameplay.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Paykin:2017:LM,
author = "Jennifer Paykin and Steve Zdancewic",
title = "The Linearity Monad",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "117--132",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122965",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce a technique for programming with
domain-specific linear languages using the monad that
arises from the theory of linear/non-linear logic. In
this work we interpret the linear/non-linear model as a
simple, effectful linear language embedded inside an
existing non-linear host language. We implement a
modular framework for defining these linear EDSLs in
Haskell, allowing both shallow and deep embeddings. To
demonstrate the effectiveness of the framework and the
linearity monad, we implement languages for file
handles, mutable arrays, session types, and quantum
computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Karachalias:2017:EFD,
author = "Georgios Karachalias and Tom Schrijvers",
title = "Elaboration on functional dependencies: functional
dependencies are dead, long live functional
dependencies!",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "133--147",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122966",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional dependencies are a popular extension to
Haskell's type-class system because they provide
fine-grained control over type inference, resolve
ambiguities and even enable type-level computations.
Unfortunately, several aspects of Haskell's functional
dependencies are ill-understood. In particular, the GHC
compiler does not properly enforce the functional
dependency property, and rejects well-typed programs
because it does not know how to elaborate them into its
core language, System F$_C$. This paper presents a
novel formalization of functional dependencies that
addresses these issues: We explicitly capture the
functional dependency property in the type system, in
the form of explicit type equalities. We also provide a
type inference algorithm and an accompanying
elaboration strategy which allows all well-typed
programs to be elaborated into System F$_C$.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Bottu:2017:QCC,
author = "Gert-Jan Bottu and Georgios Karachalias and Tom
Schrijvers and Bruno C. d. S. Oliveira and Philip
Wadler",
title = "Quantified class constraints",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "148--161",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122967",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Quantified class constraints have been proposed many
years ago to raise the expressive power of type classes
from Horn clauses to the universal fragment of
Hereditary Harrop logic. Yet, while it has been much
asked for over the years, the feature was never
implemented or studied in depth. Instead, several
workarounds have been proposed, all of which are
ultimately stopgap measures. This paper revisits the
idea of quantified class constraints and elaborates it
into a practical language design. We show the merit of
quantified class constraints in terms of more
expressive modeling and in terms of terminating type
class resolution. In addition, we provide a declarative
specification of the type system as well as a type
inference algorithm that elaborates into System F.
Moreover, we discuss termination conditions of our
system and also provide a prototype implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Aronsson:2017:HSC,
author = "Markus Aronsson and Mary Sheeran",
title = "Hardware software co-design in {Haskell}",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "162--173",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122970",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a library in Haskell for programming Field
Programmable Gate Arrays (FPGAs), including hardware
software co-design. Code for software (in C) and
hardware (in VHDL) is generated from a single program,
along with the code to support communication between
hardware and software. We present type-based techniques
for the simultaneous implementation of more than one
embedded domain specific language (EDSL). We build upon
a generic representation of imperative programs that is
loosely coupled to instruction and expression types,
allowing the individual parts to be developed and
improved separately. Code generation is implemented as
a series of translations between progressively smaller,
typed EDSLs, safeguarding against errors that arise in
untyped translations. Initial case studies show
promising performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Clifton-Everest:2017:SIA,
author = "Robert Clifton-Everest and Trevor L. McDonell and
Manuel M. T. Chakravarty and Gabriele Keller",
title = "Streaming irregular arrays",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "174--185",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122971",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Previous work has demonstrated that it is possible to
generate efficient and highly parallel code for
multicore CPUs and GPUs from combinator-based array
languages for a range of applications. That work,
however, has been limited to operating on flat,
rectangular structures without any facilities for
irregularity or nesting. In this paper, we show that
even a limited form of nesting provides substantial
benefits both in terms of the expressiveness of the
language (increasing modularity and providing support
for simple irregular structures) and the portability of
the code (increasing portability across
resource-constrained devices, such as GPUs).
Specifically, we generalise Blelloch's flattening
transformation along two lines: (1) we explicitly
distinguish between definitely regular and potentially
irregular computations; and (2) we handle
multidimensional arrays. We demonstrate the utility of
this generalisation by an extension of the embedded
array language Accelerate to include irregular streams
of multidimensional arrays. We discuss code generation,
optimisation, and irregular stream scheduling as well
as a range of benchmarks on both multicore CPUs and
GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Yates:2017:ISP,
author = "Ryan Yates and Michael L. Scott",
title = "Improving {STM} performance with transactional
structs",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "186--196",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122972",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software transactional memory (STM) has made it
significantly easier to write correct concurrent
programs in Haskell. Its performance, however, is
limited by several inefficiencies. While safe
concurrent computations are easy to express in
Haskell's STM, concurrent data structures suffer
unfortunate bloat in the implementation due to an extra
level of indirection for mutable references as well as
the inability to express unboxed mutable transactional
values. We address these deficiencies by introducing
{$<$ pre$>$TStruct$<$}/{pre$>$} to the GHC run-time
system, allowing strict unboxed transactional values as
well as mutable references without an extra
indirection. Using {$<$ pre$>$TStruct$<$}/{pre$>$} we
implement several data structures, discuss their
design, and provide benchmark results on a large
multicore machine. Our benchmarks show that concurrent
data structures built with {$<$
pre$>$TStruct$<$}/{pre$>$} out-scale and out-perform
their {$<$ pre$>$TVar$<$}/{pre$>$}-based equivalents.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Chen:2017:ALF,
author = "Chao-Hong Chen and Vikraman Choudhury and Ryan R.
Newton",
title = "Adaptive lock-free data structures in {Haskell}: a
general method for concurrent implementation swapping",
journal = j-SIGPLAN,
volume = "52",
number = "10",
pages = "197--211",
month = oct,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156695.3122973",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A key part of implementing high-level languages is
providing built- in and default data structures. Yet
selecting good defaults is hard. A mutable data
structure's workload is not known in advance, and it
may shift over its lifetime --- e.g., between
read-heavy and write-heavy, or from heavy contention by
multiple threads to single-threaded or low-frequency
use. One idea is to switch implementations adaptively,
but it is nontrivial to switch the implementation of a
concurrent data structure at runtime. Performing the
transition requires a concurrent snapshot of data
structure contents, which normally demands special
engineering in the data structure's design. However, in
this paper we identify and formalize an relevant
property of lock-free algorithms. Namely, lock-freedom
is sufficient to guarantee that freezing memory
locations in an arbitrary order will result in a valid
snapshot. Several functional languages have data
structures that freeze and thaw, transitioning between
mutable and immutable, such as Haskell vectors and
Clojure transients, but these enable only
single-threaded writers. We generalize this approach to
augment an arbitrary lock-free data structure with the
ability to gradually freeze and optionally transition
to a new representation. This augmentation doesn't
require changing the algorithm or code for the data
structure, only replacing its datatype for mutable
references with a freezable variant. In this paper, we
present an algorithm for lifting plain to adaptive data
and prove that the resulting hybrid data structure is
itself lock-free, linearizable, and simulates the
original. We also perform an empirical case study in
the context of heating up and cooling down concurrent
maps.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '17 conference proceedings.",
}
@Article{Pizlo:2017:JVM,
author = "Filip Pizlo",
title = "The {JavaScriptCore} virtual machine (invited talk)",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "1--1",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3148567",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "JavaScriptCore (JSC) is an open-source
high-performance implementation of JavaScript. JSC is
used in the WebKit open source browser engine as well
as a system framework on macOS and iOS. This talk will
give a broad high-level overview of JSC's
performance-oriented architecture, including specific
details about the object model, garbage collector,
optimizing compilers, type inference, and
deoptimization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Siek:2017:CPT,
author = "Jeremy Siek",
title = "Challenges and progress toward efficient gradual
typing (invited talk)",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "2--2",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3148570",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mixing static and dynamic type checking in the same
language is catching on, with the TypeScript and Flow
variants of JavaScript, the MyPy and Reticulated
variants of Python, the Strongtalk and Gradualtalk
variants of Smalltalk, as well as Typed Racket, Typed
Clojure, and Perl 6. The gradual typing approach to
such mixing seeks to protect the statically typed code
from the dynamically typed code, allowing compilers to
leverage type information when optimizing the static
code. Unfortunately, ensuring soundness requires
runtime checking at the boundaries of typed and untyped
code, and the cost of this checking can drown out the
performance benefits of optimization. For example, in
Typed Racket, some partially typed programs are 1000X
slower than the untyped or fully typed version of the
same program. But all is not lost! In this talk I
present the results of ongoing research to tame the
runtime overheads of gradual typing in the context of a
prototype compiler, named Grift, that we are developing
at Indiana University.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Marr:2017:CAP,
author = "Stefan Marr and Carmen Torres Lopez and Dominik Aumayr
and Elisa Gonzalez Boix and Hanspeter
M{\"o}ssenb{\"o}ck",
title = "A concurrency-agnostic protocol for multi-paradigm
concurrent debugging tools",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "3--14",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3133842",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today's complex software systems combine high-level
concurrency models. Each model is used to solve a
specific set of problems. Unfortunately, debuggers
support only the low-level notions of threads and
shared memory, forcing developers to reason about these
notions instead of the high-level concurrency models
they chose. This paper proposes a concurrency-agnostic
debugger protocol that decouples the debugger from the
concurrency models employed by the target application.
As a result, the underlying language runtime can define
custom breakpoints, stepping operations, and execution
events for each concurrency model it supports, and a
debugger can expose them without having to be
specifically adapted. We evaluated the generality of
the protocol by applying it to SOMns, a Newspeak
implementation, which supports a diversity of
concurrency models including communicating sequential
processes, communicating event loops, threads and
locks, fork/join parallelism, and software
transactional memory. We implemented 21 breakpoints and
20 stepping operations for these concurrency models.
For none of these, the debugger needed to be changed.
Furthermore, we visualize all concurrent interactions
independently of a specific concurrency model. To show
that tooling for a specific concurrency model is
possible, we visualize actor turns and message sends
separately.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Ungar:2017:DAO,
author = "David Ungar and David Grove and Hubertus Franke",
title = "Dynamic atomicity: optimizing swift memory
management",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "15--26",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3133843",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Swift is a modern multi-paradigm programming language
with an extensive developer community and open source
ecosystem. Swift 3's memory management strategy is
based on Automatic Reference Counting (ARC) augmented
with unsafe APIs for manually-managed memory. We have
seen ARC consume as much as 80\% of program execution
time. A significant portion of ARC's direct performance
cost can be attributed to its use of atomic machine
instructions to protect reference count updates from
data races. Consequently, we have designed and
implemented dynamic atomicity, an optimization which
safely replaces atomic reference-counting operations
with nonatomic ones where feasible. The optimization
introduces a store barrier to detect possibly
intra-thread references, compiler-generated recursive
reference-tracers to find all affected objects, and a
bit of state in each reference count to encode its
atomicity requirements. Using a suite of 171
microbenchmarks, 9 programs from the Computer Language
Benchmarks Game, and the Richards benchmark, we
performed a limit study by unsafely making all
reference counting operations nonatomic. We measured
potential speedups of up to 220\% on the
microbenchmarks, 120\% on the Benchmarks Game and 70\%
on Richards. By automatically reducing ARC overhead,
our optimization both improves Swift 3's performance
and reduces the temptation for performance-oriented
programmers to resort to unsafe manual memory
management. Furthermore, the machinery implemented for
dynamic atomicity could also be employed to obtain
cheaper thread-safe Swift data structures, or to
augment ARC with optional cycle detection or a backup
tracing garbage collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Infante:2017:OER,
author = "Alejandro Infante and Alexandre Bergel",
title = "Object equivalence: revisiting object equality
profiling (an experience report)",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "27--38",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3133844",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern object-oriented programming languages greatly
alleviate the memory management for programmers.
Despite the efficiency of garbage collection and
Just-In-Time program analyzes, memory still remains
prone to be wasted. A bloated memory may have severe
consequences, including frequent execution lags due to
a high pressure on the garbage collector and suboptimal
object dependencies. We found that dynamically
monitoring object production sites and the equivalence
of the produced objects is key to identify wasted
memory consumption caused by redundant objects. We
implemented optimizations for reducing the memory
consumption of six applications, achieving a reduction
over 40\% in half of the applications without having
any prior knowledge of these applications. Our results
partially replicate the results obtained by Marinov and
O'Callahan and explore new ways to identify redundant
objects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Pimas:2017:GCE,
author = "Javier Pim{\'a}s and Javier Burroni and Jean Baptiste
Arnaud and Stefan Marr",
title = "Garbage collection and efficiency in dynamic
metacircular runtimes: an experience report",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "39--50",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3133845",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "In dynamic object-oriented languages, low-level
mechanisms such as just-in-time compilation, object
allocation, garbage collection (GC) and method dispatch
are often handled by virtual machines (VMs). VMs are
typically implemented using static languages, allowing
only few changes at run time. In such systems, the VM
is not part of the language and interfaces to memory
management or method dispatch are fixed, not allowing
for arbitrary adaptation. Furthermore, the
implementation can typically not be inspected or
debugged with standard tools used to work on
application code. This paper reports on our experience
building Bee, a dynamic Smalltalk runtime, written in
Smalltalk. Bee is a Dynamic Metacircular Runtime (DMR)
and seamlessly integrates the VM into the application
and thereby overcomes many restrictions of classic VMs,
for instance by allowing arbitrary code modifications
of the VM at run time. Furthermore, the approach
enables developers to use their standard tools for
application code also for the VM, allowing them to
inspect, debug, understand, and modify a DMR
seamlessly. We detail our experience of implementing
GC, compilation, and optimizations in a DMR. We discuss
examples where we found that DMRs can improve
understanding of the system, provide tighter control of
the software stack, and facilitate research. We also
show that the Bee DMR matches and surpass the
performance of a widely used Smalltalk VM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Loring:2017:SAJ,
author = "Matthew C. Loring and Mark Marron and Daan Leijen",
title = "Semantics of asynchronous {JavaScript}",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "51--62",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3133846",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript code running in the Node.js runtime is a
major platform for developers building cloud, mobile,
or IoT applications. A fundamental concept in Node.js
programming is the use of asynchronous callbacks and
event loops to provide highly responsive applications.
While conceptually simple, this programming model
contains numerous subtleties and behaviors that are
defined implicitly by the current Node.js
implementation. This paper presents the first
comprehensive formalization of the Node.js asynchronous
execution model and defines a high-level notion of
async-contexts to formalize fundamental relationships
between asynchronous executions in an application.
These formalizations provide a foundation for the
construction of static or dynamic program analysis
tools, support the exploration of alternative Node.js
event loop implementations, and provide a high-level
conceptual framework for reasoning about relationships
between the execution of asynchronous callbacks in a
Node.js application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Vergu:2017:SNR,
author = "Vlad Vergu and Michiel Haisma and Eelco Visser",
title = "The semantics of name resolution in {Grace}",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "63--74",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3133847",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Grace is a dynamic object oriented programming
language designed to aid programming education. We
present a formal model of and give an operational
semantics for its object model and name resolution
algorithm. Our main contributions are a systematic
model of Grace's name resolution using scope graphs,
relating linguistic features to other languages, and an
operationalization of this model in the form of an
operational semantics which is readable and executable.
The semantics are extensively tested against a
reference Grace implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Soldevila:2017:DLF,
author = "Mallku Soldevila and Beta Ziliani and Bruno Silvestre
and Daniel Fridlender and Fabio Mascarenhas",
title = "Decoding {Lua}: formal semantics for the developer and
the semanticist",
journal = j-SIGPLAN,
volume = "52",
number = "11",
pages = "75--86",
month = nov,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170472.3133848",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We provide formal semantics for a large subset of the
Lua programming language, in its version 5.2. We
validate our model by mechanizing it and testing it
against the test suite of the reference interpreter of
Lua, obtaining evidence that our model accurately
represents the language. We target both a PL
semanticist --- not necessarily versed in Lua --- , and
a Lua developer --- not necessarily versed in semantic
frameworks. To the former, we present the peculiarities
of the language, and how we model them in a modular
small-step operational semantics, using concepts from
Felleisen-Hieb's reduction semantics with evaluation
contexts. Moreover, we mechanize and test the model in
PLT Redex, the de facto tool for reduction semantics.
To the reader unfamiliar with such concepts, we provide
a gentle introduction to the model. It is our hope that
developers of the different Lua implementations and
dialects understand the model and consider it both for
testing their work and for experimenting with new
language features.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "DLS '17 conference proceedings.",
}
@Article{Dig:2017:LRR,
author = "Danny Dig",
title = "The landscape of refactoring research in the last
decade (keynote)",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "1--1",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3148040",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the last decade refactoring research has seen an
exponential growth. I will attempt to map this vast
landscape and the advances that the community has made
by answering questions such as who does what, when,
where, with who, why, and how. I will muse on some of
the factors contributing to the growth of the field,
the adoption of research into industry, and the lessons
that we learned along this journey. This will inspire
and equip you so that you can make a difference, with
people who make a difference, at a time when it makes a
difference.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Ge:2017:RSM,
author = "Rui Ge and Ronald Garcia",
title = "Refining semantics for multi-stage programming",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "2--14",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136047",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The multi-stage programming paradigm supports runtime
code generation and execution. Though powerful, its
potential is impeded by the lack of static analysis
support. Van Horn and Might proposed a general-purpose
approach to systematically develop static analyses by
transforming an environmental abstract machine, which
evolves a control string, an environment and a
continuation as a program evaluates. To the best of our
knowledge, no such semantics exists for a multi-stage
language like MetaML. We develop and prove correct an
environmental abstract machine semantics for MetaML by
gradually refining the reference substitutional
structural operational semantics. Highlights of our
approach include leveraging explicit substitutions to
bridge the gap between substitutional and environmental
semantics, and devising meta-environments to model the
complexities of variable bindings in multi-stage
environmental semantics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Ofenbeck:2017:SGP,
author = "Georg Ofenbeck and Tiark Rompf and Markus
P{\"u}schel",
title = "Staging for generic programming in space and time",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "15--28",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136060",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Metaprogramming is among the most promising candidates
to solve the abstraction vs performance trade-off that
plagues software engineering through specialization.
Metaprogramming has been used to enable low-overhead
generic programming for a long time, with C++ templates
being one of the most prominent examples. But often a
single, fixed pattern of specialization is not enough,
and more flexibility is needed. Hence, this paper seeks
to apply generic programming techniques to challenges
in metaprogramming, in particular to abstract over the
execution stage of individual program expressions. We
thus extend the scope of generic programming into the
dimension of time. The resulting notion of stage
polymorphism enables novel abstractions in the design
of program generators, which we develop and explore in
this paper. We present one possible implementation, in
Scala using the lightweight modular staging (LMS)
framework, and apply it to two important case studies:
convolution on images and the fast Fourier transform
(FFT).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Oishi:2017:SCT,
author = "Junpei Oishi and Yukiyoshi Kameyama",
title = "Staging with control: type-safe multi-stage
programming with control operators",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "29--40",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136049",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Staging allows a programmer to write domain-specific,
custom code generators. Ideally, a programming language
for staging provides all necessary features for
staging, and at the same time, gives static guarantee
for the safety properties of generated code including
well typedness and well scopedness. We address this
classic problem for the language with control
operators, which allow code optimizations in a modular
and compact way. Specifically, we design a staged
programming language with the expressive control
operators shift0 and reset0, which let us express, for
instance, multi-layer let-insertion, while keeping the
static guarantee of well typedness and well scopedness.
For this purpose, we extend our earlier work on refined
environment classifiers which were introduced for the
staging language with state. We show that our language
is expressive enough to express interesting code
generation techniques, and that the type system enjoys
type soundness. We also mention a type inference
algorithm for our language under reasonable
restriction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Courtes:2017:CSG,
author = "Ludovic Court{\`e}s",
title = "Code staging in {GNU Guix}",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "41--48",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136045",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/gnu.bib;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "GNU Guix is a ``functional'' package manager that
borrows from earlier work on Nix by Dolstra et al..
Guix implements high-level abstractions such as
packages and operating system services as
domain-specific languages (DSL) embedded in Scheme, and
it also implements build actions and operating system
orchestration in Scheme. This leads to a multi-tier
programming environment where embedded code snippets
are staged for eventual execution. In this paper we
present G-expressions or ``gexps''. We explain our
journey from traditional Lisp S-expressions to
G-expressions, which augment the former with contextual
information, and we discuss the implementation of
gexps. We report on our experience using gexps in a
variety of operating system use cases --- from package
build processes to system services. Gexps provide a
novel way to cover many aspects of OS configuration in
a single, multi-tier language while facilitating code
reuse and code sharing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Linsbauer:2017:CVC,
author = "Lukas Linsbauer and Thorsten Berger and Paul
Gr{\"u}nbacher",
title = "A classification of variation control systems",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "49--62",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136054",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Version control systems are an integral part of
today's software and systems development processes.
They facilitate the management of revisions (sequential
versions) and variants (concurrent versions) of a
system under development and enable collaboration
between developers. Revisions are commonly maintained
either per file or for the whole system. Variants are
supported via branching or forking mechanisms that
conceptually clone the whole system under development.
It is known that such cloning practices come with
disadvantages. In fact, while short-lived branches for
isolated development of new functionality (a.k.a.
feature branches) are well supported, dealing with
long-term and fine-grained system variants currently
requires employing additional mechanisms, such as
preprocessors, build systems or custom configuration
tools. Interestingly, the literature describes a number
of variation control systems, which provide a richer
set of capabilities for handling fine-grained system
variants compared to the version control systems widely
used today. In this paper we present a classification
and comparison of selected variation control systems to
get an understanding of their capabilities and the
advantages they can offer. We discuss problems of
variation control systems, which may explain their
comparably low popularity. We also propose research
activities we regard as important to change this
situation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Lapena:2017:AIN,
author = "Ra{\'u}l Lape{\~n}a and Jaime Font and {\'O}scar
Pastor and Carlos Cetina",
title = "Analyzing the impact of natural language processing
over feature location in models",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "63--76",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136052",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Feature Location (FL) is a common task in the Software
Engineering field, specially in maintenance and
evolution of software products. The results of FL
depend in a great manner in the style in which Feature
Descriptions and software artifacts are written.
Therefore, Natural Language Processing (NLP) techniques
are used to process them. Through this paper, we
analyze the influence of the most common NLP techniques
over FL in Conceptual Models through Latent Semantic
Indexing, and the influence of human participation when
embedding domain knowledge in the process. We evaluated
the techniques in a real-world industrial case study in
the rolling stocks domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Fenske:2017:HPA,
author = "Wolfram Fenske and Sandro Schulze and Gunter Saake",
title = "How preprocessor annotations (do not) affect
maintainability: a case study on change-proneness",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "77--90",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136059",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Preprocessor annotations (e.g., \#ifdef in C) enable
the development of similar, but distinct software
variants from a common code base. One particularly
popular preprocessor is the C preprocessor, cpp. But
the cpp is also widely criticized for impeding software
maintenance by making code hard to understand and
change. Yet, evidence to support this criticism is
scarce. In this paper, we investigate the relation
between cpp usage and maintenance effort, which we
approximate with the frequency and extent of source
code changes. To this end, we mined the version control
repositories of eight open-source systems written in C.
For each system, we measured if and how individual
functions use cpp annotations and how they were
changed. We found that functions containing cpp
annotations are generally changed more frequently and
more profoundly than other functions. However, when
accounting for function size, the differences disappear
or are greatly diminished. In summary, with respect to
the frequency and extent of changes, our findings do
not support the criticism of the cpp regarding
maintainability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Carlson:2017:TQC,
author = "Travis Carlson and Eric {Van Wyk}",
title = "Type qualifiers as composable language extensions",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "91--103",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136055",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper reformulates type qualifiers as language
extensions that can be automatically and reliably
composed. Type qualifiers annotate type expressions to
introduce new subtyping relations and are powerful
enough to detect many kinds of errors. Type qualifiers,
as illustrated in our ableC extensible language
framework for C, can introduce rich forms of concrete
syntax, can generate dynamic checks on data when static
checks are infeasible or not appropriate, and inject
code that affects the program's behavior, for example
for conversions of data or logging. ableC language
extensions to C are implemented as attribute grammar
fragments and provide an expressive mechanism for type
qualifier implementations to check for additional
errors, e.g. dereferences to pointers not qualified by
a ``nonnull'' qualifier, and report custom error
messages. Our approach distinguishes language extension
users from developers and provides modular analyses to
developers to ensure that when users select a set of
extensions to use, they will automatically compose to
form a working compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Rosa:2017:ARC,
author = "Andrea Ros{\`a} and Eduardo Rosales and Walter
Binder",
title = "Accurate reification of complete supertype information
for dynamic analysis on the {JVM}",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "104--116",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136061",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Reflective supertype information (RSI) is useful for
many instrumentation-based dynamic analyses on the Java
Virtual Machine (JVM). On the one hand, while such
information can be obtained when performing the
instrumentation within the same JVM process executing
the instrumented program, in-process instrumentation
severely limits the code coverage of the analysis. On
the other hand, performing the instrumentation in a
separate process can achieve full code coverage, but
complete RSI is generally not available, often
requiring expensive runtime checks in the instrumented
program. Providing accurate and complete RSI in the
instrumentation process is challenging because of
dynamic class loading and classloader namespaces. In
this paper, we present a novel technique to accurately
reify complete RSI in a separate instrumentation
process. We implement our technique in the dynamic
analysis framework DiSL and evaluate it on a task
profiler, achieving speedups of up to 45\% for an
analysis with full code coverage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Pearce:2017:RSC,
author = "David J. Pearce",
title = "Rewriting for sound and complete union, intersection
and negation types",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "117--130",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136042",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Implementing the type system of a programming language
is a critical task that is often done in an ad-hoc
fashion. Whilst this makes it hard to ensure the system
is sound, it also makes it difficult to extend as the
language evolves. We are interested in describing type
systems using declarative rewrite rules from which an
implementation can be automatically generated. Whilst
not all type systems are easily expressed in this
manner, those involving unions, intersections and
negations are well-suited for this. In this paper, we
consider a relatively complex type system involving
unions, intersections and negations developed
previously. This system was not developed with
rewriting in mind, though clear parallels are
immediately apparent from the original presentation.
For example, the system presented required types be
first converted into a variation on Disjunctive Normal
Form. We identify that the original system can, for the
most part, be reworked to enable a natural expression
using declarative rewrite rules. We present an
implementation of our rewrite rules in the Whiley
Rewrite Language (WyRL), and report performance results
compared with a hand-coded solution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Parreaux:2017:QSR,
author = "Lionel Parreaux and Amir Shaikhha and Christoph E.
Koch",
title = "Quoted staged rewriting: a practical approach to
library-defined optimizations",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "131--145",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136043",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
abstract = "Staging has proved a successful technique for
programmatically removing code abstractions, thereby
allowing for faster program execution while retaining a
high-level interface for the programmer. Unfortunately,
techniques based on staging suffer from a number of
problems --- ranging from practicalities to fundamental
limitations --- which have prevented their widespread
adoption. We introduce Quoted Staged Rewriting (QSR),
an approach that uses type-safe, pattern
matching-enabled quasiquotes to define optimizations.
The approach is ``staged'' in two ways: first, rewrite
rules can execute arbitrary code during pattern
matching and code reconstruction, leveraging the power
and flexibility of staging; second, library designers
can orchestrate the application of successive rewriting
phases (stages). The advantages of using
quasiquote-based rewriting are that library designers
never have to deal directly with the intermediate
representation (IR), and that it allows for
non-intrusive optimizations --- in contrast with
staging, it is not necessary to adapt the entire
library and user programs to accommodate optimizations.
We show how Squid, a Scala macro-based framework,
enables QSR and renders library-defined optimizations
more practical than ever before: library designers
write domain-specific optimizers that users invoke
transparently on delimited portions of their code base.
As a motivating example we describe an implementation
of stream fusion (a well-known deforestation technique)
that is both simpler and more powerful than the state
of the art, and can readily be used by Scala
programmers with no knowledge of metaprogramming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Caldwell:2017:RCC,
author = "Joseph Caldwell and Shigeru Chiba",
title = "Reducing calling convention overhead in
object-oriented programming on embedded {ARM Thumb-2}
platforms",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "146--156",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136057",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper examines the causes and extent of code size
overhead caused by the ARM calling convention in
Thumb-2 binaries. We show that binaries generated from
C++ source files generally have higher amounts of
calling convention overhead, and present a binary file
optimizer to eliminate some of that overhead. Calling
convention overhead can negatively impact power
consumption, flash memory costs, and chip size in
embedded or otherwise resource-constrained domains.
This is particularly true on platforms using
``compressed'' instruction sets, such as the 16-bit ARM
Thumb and Thumb-2 instruction sets, used in virtually
all smartphones and in many other smaller-scale
embedded devices. In this paper, we examine the extent
of calling convention overhead in practical software,
and compare the results of C and C++ programs, and find
that C++ programs generally have a higher percentage of
calling-convention overhead. Finally, we demonstrate a
tool capable of eliminating some of this overhead,
particularly in the case of C++ programs, by modifying
the calling conventions on a per-procedure basis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Perard-Gayot:2017:RSE,
author = "Ars{\`e}ne P{\'e}rard-Gayot and Martin Weier and
Richard Membarth and Philipp Slusallek and Roland
Lei{\ss}a and Sebastian Hack",
title = "{RaTrace}: simple and efficient abstractions for {BVH}
ray traversal algorithms",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "157--168",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136044",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In order to achieve the highest possible performance,
the ray traversal and intersection routines at the core
of every high-performance ray tracer are usually
hand-coded, heavily optimized, and implemented
separately for each hardware platform-even though they
share most of their algorithmic core. The results are
implementations that heavily mix algorithmic aspects
with hardware and implementation details, making the
code non-portable and difficult to change and maintain.
In this paper, we present a new approach that offers
the ability to define in a functional language a set of
conceptual, high-level language abstractions that are
optimized away by a special compiler in order to
maximize performance. Using this abstraction mechanism
we separate a generic ray traversal and intersection
algorithm from its low-level aspects that are specific
to the target hardware. We demonstrate that our code is
not only significantly more flexible, simpler to write,
and more concise but also that the compiled results
perform as well as state-of-the-art implementations on
any of the tested CPU and GPU platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Susungi:2017:TCG,
author = "Adilla Susungi and Norman A. Rink and Jer{\'o}nimo
Castrill{\'o}n and Immo Huismann and Albert Cohen and
Claude Tadonki and J{\"o}rg Stiller and Jochen
Fr{\"o}hlich",
title = "Towards compositional and generative tensor
optimizations",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "169--175",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136050",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many numerical algorithms are naturally expressed as
operations on tensors (i.e. multi-dimensional arrays).
Hence, tensor expressions occur in a wide range of
application domains, e.g. quantum chemistry and
physics; big data analysis and machine learning; and
computational fluid dynamics. Each domain, typically,
has developed its own strategies for efficiently
generating optimized code, supported by tools such as
domain-specific languages, compilers, and libraries.
However, strategies and tools are rarely portable
between domains, and generic solutions typically act as
''black boxes'' that offer little control over code
generation and optimization. As a consequence, there
are application domains without adequate support for
easily generating optimized code, e.g. computational
fluid dynamics. In this paper we propose a generic and
easily extensible intermediate language for expressing
tensor computations and code transformations in a
modular and generative fashion. Beyond being an
intermediate language, our solution also offers
meta-programming capabilities for experts in code
optimization. While applications from the domain of
computational fluid dynamics serve to illustrate our
proposed solution, we believe that our general approach
can help unify research in tensor optimizations and
make solutions more portable between domains.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Al-Kofahi:2017:FLL,
author = "Jafar M. Al-Kofahi and Suresh Kothari and Christian
K{\"a}stner",
title = "Four languages and lots of macros: analyzing autotools
build systems",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "176--186",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136051",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Build systems are crucial for software system
development, however there is a lack of tool support to
help with their high maintenance overhead. GNU
Autotools are widely used in the open source community,
but users face various challenges from its hard to
comprehend nature and staging of multiple code
generation steps, often leading to low quality and
error-prone build code. In this paper, we present a
platform, AutoHaven, to provide a foundation for
developers to create analysis tools to help them
understand, maintain, and migrate their GNU Autotools
build systems. Internally it uses approximate parsing
and symbolic analysis of the build logic. We illustrate
the use of the platform with two tools: ACSense helps
developers to better understand their build systems and
ACSniff detects build smells to improve build code
quality. Our evaluation shows that AutoHaven can
support most GNU Autotools build systems and can detect
build smells in the wild.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Fernandes:2017:AUM,
author = "Leonardo Fernandes and M{\'a}rcio Ribeiro and Luiz
Carvalho and Rohit Gheyi and Melina Mongiovi and
Andr{\'e} Santos and Ana Cavalcanti and Fabiano Ferrari
and Jos{\'e} Carlos Maldonado",
title = "Avoiding useless mutants",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "187--198",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136053",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mutation testing is a program-transformation technique
that injects artificial bugs to check whether the
existing test suite can detect them. However, the costs
of using mutation testing are usually high, hindering
its use in industry. Useless mutants (equivalent and
duplicated) contribute to increase costs. Previous
research has focused mainly on detecting useless
mutants only after they are generated and compiled. In
this paper, we introduce a strategy to help developers
with deriving rules to avoid the generation of useless
mutants. To use our strategy, we pass as input a set of
programs. For each program, we also need a passing test
suite and a set of mutants. As output, our strategy
yields a set of useless mutants candidates. After
manually confirming that the mutants classified by our
strategy as ``useless'' are indeed useless, we derive
rules that can avoid their generation and thus decrease
costs. To the best of our knowledge, we introduce 37
new rules that can avoid useless mutants right before
their generation. We then implement a subset of these
rules in the MUJAVA mutation testing tool. Since our
rules have been derived based on artificial and small
Java programs, we take our MUJAVA version embedded with
our rules and execute it in industrial-scale projects.
Our rules reduced the number of mutants by almost 13\%
on average. Our results are promising because (i) we
avoid useless mutants generation; (ii) our strategy can
help with identifying more rules in case we set it to
use more complex Java programs; and (iii) our MUJAVA
version has only a subset of the rules we derived.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Nakamaru:2017:SFA,
author = "Tomoki Nakamaru and Kazuhiro Ichikawa and Tetsuro
Yamazaki and Shigeru Chiba",
title = "{Silverchain}: a fluent {API} generator",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "199--211",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136041",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a tool named Silverchain, which
generates class definitions for a fluent API from the
grammar of the API. A fluent API is an API that is used
by method chaining and its grammar is a BNF-like set of
rules that defines method chains accepted in type
checking. Fluent APIs generated by Silverchain provide
two styles of APIs: One is for building a chain by
concatenating all method calls in series. The other is
for building a chain from partial chains by passing
child chains to method calls in the parent chain as
their arguments. To generate such a fluent API,
Silverchain first translates given grammar into a set
of deterministic pushdown automata without
{\~N}-transitions, then encodes these automata into
class definitions. Each constructed automata
corresponds to a nonterminal in given grammar and
recognizes symbol sequences produced from its
corresponding nonterminal.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Zaytsev:2017:PGE,
author = "Vadim Zaytsev",
title = "Parser generation by example for legacy pattern
languages",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "212--218",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136058",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Most modern software languages enjoy relatively free
and relaxed concrete syntax, with significant
flexibility of formatting of the program/model/sheet
text. Yet, in the dark legacy corners of software
engineering there are still languages with a strict
fixed column-based structure --- the compromises of
times long gone, attempting to combine some human
readability with some ease of machine processing. In
this paper, we consider an industrial case study for
retirement of a legacy domain-specific language,
completed under extreme circumstances: absolute lack of
documentation, varying line structure, hierarchical
blocks within one file, scalability demands for
millions of lines of code, performance demands for
manipulating tens of thousands multi-megabyte files,
etc. However, the regularity of the language allowed to
infer its structure from the available examples,
automatically, and produce highly efficient parsers for
it.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Mainland:2017:HCS,
author = "Geoffrey Mainland and Jeremy Johnson",
title = "A {Haskell} compiler for signal transforms",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "219--232",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136056",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Building a reusable, auto-tuning code generator from
scratch is a challenging problem, requiring many
careful design choices. We describe HSpiral, a Haskell
compiler for signal transforms that builds on the
foundational work of Spiral. Our design leverages many
Haskell language features to ensure that our framework
is reusable, flexible, and efficient. As well as
describing the design of our system, we show how to
extend it to support new classes of transforms,
including the number-theoretic transform and a variant
of the split-radix algorithm that results in reduced
operation counts. We also show how to incorporate
rewrite rules into our system to reproduce results from
previous literature on code generation for the fast
Fourier transform. Although the Spiral project
demonstrated significant advances in automatic code
generation, it has not been widely used by other
researchers. HSpiral is freely available under an
MIT-style license, and we are actively working to turn
it into a tool to further both our own research goals
and to serve as a foundation for other research groups'
work in developing new implementations of signal
transform algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Martini:2017:AGV,
author = "Ricardo Giuliani Martini and Pedro Rangel Henriques",
title = "Automatic generation of virtual learning spaces driven
by {CaVa DSL}: an experience report",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "233--245",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136046",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Several applications are based on Domain-Specific
Languages (DSL). They provide the right terminology to
a peculiar problem/subject, because they use a
particular domain vocabulary that defines abstract
concepts, different from general-purpose languages.
Aiming an easy generation of virtual Learning Spaces
(LS) for the use of the responsible of institutional
archives or museums, we have idealized and developed an
external domain-specific language, called CaVa DSL, to
describe, in an abstract level, virtual exhibition
rooms in the museum curator's viewpoint, giving the
curator the possibility to specify the virtual LS upon
a domain ontology vocabulary. We also contribute with a
set of processors that deal with CaVa DSL and generates
virtual Learning Spaces, turning available the
navigation over important and real information
contained in archival documents to the public through
virtual museums. To demonstrate the obtained results,
we present a running example along the paper showing
the virtual LS generation process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Grebe:2017:RSD,
author = "Mark Grebe and David Young and Andy Gill",
title = "Rewriting a shallow {DSL} using a {GHC} compiler
extension",
journal = j-SIGPLAN,
volume = "52",
number = "12",
pages = "246--258",
month = dec,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3170492.3136048",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:14 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Embedded Domain Specific Languages are a powerful tool
for developing customized languages to fit specific
problem domains. Shallow EDSLs allow a programmer to
program using many of the features of a host language
and its syntax, but sacrifice performance. Deep EDSLs
provide better performance and flexibility, through the
ability to manipulate the abstract syntax tree of the
DSL program, but sacrifice syntactical similarity to
the host language. Using Haskino, an EDSL designed for
small embedded systems based on the Arduino line of
microcontrollers, and a compiler plugin for the Haskell
GHC compiler, we show a method for combining the best
aspects of shallow and deep EDSLs. The programmer is
able to write in the shallow EDSL, and have it
automatically transformed into the deep EDSL. This
allows the EDSL user to benefit from powerful aspects
of the host language, Haskell, while meeting the
demanding resource constraints of the small embedded
processing environment.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "GPCE '17 conference proceedings.",
}
@Article{Wen:2018:IBM,
author = "Haosen Wen and Joseph Izraelevitz and Wentao Cai and
H. Alan Beadle and Michael L. Scott",
title = "Interval-based memory reclamation",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "1--13",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178488",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper we present interval-based reclamation
(IBR), a new approach to safe reclamation of
disconnected memory blocks in nonblocking concurrent
data structures. Safe reclamation is a difficult
problem: a thread, before freeing a block, must ensure
that no other threads are accessing that block; the
required synchronization tends to be expensive. In
contrast with epoch-based reclamation, in which threads
reserve all blocks created after a certain time, or
pointer-based reclamation (e.g., hazard pointers), in
which threads reserve individual blocks, IBR allows a
thread to reserve all blocks known to have existed in a
bounded interval of time. By comparing a thread's
reserved interval with the lifetime of a detached but
not yet reclaimed block, the system can determine if
the block is safe to free. Like hazard pointers, IBR
avoids the possibility that a single stalled thread may
reserve an unbounded number of blocks; unlike hazard
pointers, it avoids a memory fence on most
pointer-following operations. It also avoids the need
to explicitly ``unreserve'' a no-longer-needed pointer.
We describe three specific IBR schemes (one with
several variants) that trade off performance,
applicability, and space requirements. IBR requires no
special hardware or OS support. In experiments with
data structure microbenchmarks, it also compares
favorably (in both time and space) to other
state-of-the-art approaches, making it an attractive
alternative for libraries of concurrent data
structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Arbel-Raviv:2018:HEB,
author = "Maya Arbel-Raviv and Trevor Brown",
title = "Harnessing epoch-based reclamation for efficient range
queries",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "14--27",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178489",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent sets with range query operations are highly
desirable in applications such as in-memory databases.
However, few set implementations offer range queries.
Known techniques for augmenting data structures with
range queries (or operations that can be used to build
range queries) have numerous problems that limit their
usefulness. For example, they impose high overhead or
rely heavily on garbage collection. In this work, we
show how to augment data structures with highly
efficient range queries, without relying on garbage
collection. We identify a property of epoch-based
memory reclamation algorithms that makes them ideal for
implementing range queries, and produce three
algorithms, which use locks, transactional memory and
lock-free techniques, respectively. Our algorithms are
applicable to more data structures than previous work,
and are shown to be highly efficient on a large scale
Intel system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Friedman:2018:PLF,
author = "Michal Friedman and Maurice Herlihy and Virendra
Marathe and Erez Petrank",
title = "A persistent lock-free queue for non-volatile memory",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "28--40",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178490",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Non-volatile memory is expected to coexist with (or
even displace) volatile DRAM for main memory in
upcoming architectures. This has led to increasing
interest in the problem of designing and specifying
durable data structures that can recover from system
crashes. Data structures may be designed to satisfy
stricter or weaker durability guarantees to provide a
balance between the strength of the provided guarantees
and performance overhead. This paper proposes three
novel implementations of a concurrent lock-free queue.
These implementations illustrate algorithmic challenges
in building persistent lock-free data structures with
different levels of durability guarantees. In
presenting these challenges, the proposed algorithmic
designs, and the different durability guarantees, we
hope to shed light on ways to build a wide variety of
durable data structures. We implemented the various
designs and compared their performance overhead to a
simple queue design for standard (volatile) memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Wang:2018:SDG,
author = "Linnan Wang and Jinmian Ye and Yiyang Zhao and Wei Wu
and Ang Li and Shuaiwen Leon Song and Zenglin Xu and
Tim Kraska",
title = "Superneurons: dynamic {GPU} memory management for
training deep neural networks",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "41--53",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178491",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Going deeper and wider in neural architectures
improves their accuracy, while the limited GPU DRAM
places an undesired restriction on the network design
domain. Deep Learning (DL) practitioners either need to
change to less desired network architectures, or
nontrivially dissect a network across multiGPUs. These
distract DL practitioners from concentrating on their
original machine learning tasks. We present
SuperNeurons: a dynamic GPU memory scheduling runtime
to enable the network training far beyond the GPU DRAM
capacity. SuperNeurons features 3 memory optimizations,
Liveness Analysis, Unified Tensor Pool, and Cost-Aware
Recomputation; together they effectively reduce the
network-wide peak memory usage down to the maximal
memory usage among layers. We also address the
performance issues in these memory-saving techniques.
Given the limited GPU DRAM, SuperNeurons not only
provisions the necessary memory for the training, but
also dynamically allocates the memory for convolution
workspaces to achieve the high performance. Evaluations
against Caffe, Torch, MXNet and TensorFlow have
demonstrated that SuperNeurons trains at least 3.2432
deeper network than current ones with the leading
performance. Particularly, SuperNeurons can train
ResNet2500 that has 10$^4$ basic network layers on a
12GB K40c.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Belviranli:2018:JDA,
author = "Mehmet E. Belviranli and Seyong Lee and Jeffrey S.
Vetter and Laxmi N. Bhuyan",
title = "{Juggler}: a dependence-aware task-based execution
framework for {GPUs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "54--67",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178492",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scientific applications with single instruction,
multiple data (SIMD) computations show considerable
performance improvements when run on today's graphics
processing units (GPUs). However, the existence of data
dependences across thread blocks may significantly
impact the speedup by requiring global synchronization
across multiprocessors (SMs) inside the GPU. To
efficiently run applications with interblock data
dependences, we need fine-granular task-based execution
models that will treat SMs inside a GPU as stand-alone
parallel processing units. Such a scheme will enable
faster execution by utilizing all internal computation
elements inside the GPU and eliminating unnecessary
waits during device-wide global barriers. In this
paper, we propose Juggler, a task-based execution
scheme for GPU workloads with data dependences. The
Juggler framework takes applications embedding OpenMP
4.5 tasks as input and executes them on the GPU via an
efficient in-device runtime, hence eliminating the need
for kernel-wide global synchronization. Juggler
requires no or little modification to the source code,
and once launched, the runtime entirely runs on the GPU
without relying on the host through the entire
execution. We have evaluated Juggler on an NVIDIA Tesla
P100 GPU and obtained up to 31\% performance
improvement against global barrier based
implementation, with minimal runtime overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Kotsifakou:2018:HHP,
author = "Maria Kotsifakou and Prakalp Srivastava and Matthew D.
Sinclair and Rakesh Komuravelli and Vikram Adve and
Sarita Adve",
title = "{HPVM}: heterogeneous parallel virtual machine",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "68--80",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178493",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "We propose a parallel program representation for
heterogeneous systems, designed to enable performance
portability across a wide range of popular parallel
hardware, including GPUs, vector instruction sets,
multicore CPUs and potentially FPGAs. Our
representation, which we call HPVM, is a hierarchical
dataflow graph with shared memory and vector
instructions. HPVM supports three important
capabilities for programming heterogeneous systems: a
compiler intermediate representation (IR), a virtual
instruction set (ISA), and a basis for runtime
scheduling; previous systems focus on only one of these
capabilities. As a compiler IR, HPVM aims to enable
effective code generation and optimization for
heterogeneous systems. As a virtual ISA, it can be used
to ship executable programs, in order to achieve both
functional portability and performance portability
across such systems. At runtime, HPVM enables flexible
scheduling policies, both through the graph structure
and the ability to compile individual nodes in a
program to any of the target devices on a system. We
have implemented a prototype HPVM system, defining the
HPVM IR as an extension of the LLVM compiler IR,
compiler optimizations that operate directly on HPVM
graphs, and code generators that translate the virtual
ISA to NVIDIA GPUs, Intel's AVX vector units, and to
multicore X86-64 processors. Experimental results show
that HPVM optimizations achieve significant performance
improvements, HPVM translators achieve performance
competitive with manually developed OpenCL code for
both GPUs and vector hardware, and that runtime
scheduling policies can make use of both program and
runtime information to exploit the flexible compilation
capabilities. Overall, we conclude that the HPVM
representation is a promising basis for achieving
performance portability and for implementing
parallelizing compilers for heterogeneous parallel
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Guatto:2018:HMM,
author = "Adrien Guatto and Sam Westrick and Ram Raghunathan and
Umut Acar and Matthew Fluet",
title = "Hierarchical memory management for mutable state",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "81--93",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178494",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is well known that modern functional programming
languages are naturally amenable to parallel
programming. Achieving efficient parallelism using
functional languages, however, remains difficult.
Perhaps the most important reason for this is their
lack of support for efficient in-place updates, i.e.,
mutation, which is important for the implementation of
both parallel algorithms and the run-time system
services (e.g., schedulers and synchronization
primitives) used to execute them. In this paper, we
propose techniques for efficient mutation in parallel
functional languages. To this end, we couple the memory
manager with the thread scheduler to make reading and
updating data allocated by nested threads efficient. We
describe the key algorithms behind our technique,
implement them in the MLton Standard ML compiler, and
present an empirical evaluation. Our experiments show
that the approach performs well, significantly
improving efficiency over existing functional language
implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Zhao:2018:BGB,
author = "Yue Zhao and Jiajia Li and Chunhua Liao and Xipeng
Shen",
title = "Bridging the gap between deep learning and sparse
matrix format selection",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "94--108",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178495",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This work presents a systematic exploration on the
promise and special challenges of deep learning for
sparse matrix format selection---a problem of
determining the best storage format for a matrix to
maximize the performance of Sparse Matrix Vector
Multiplication (SpMV). It describes how to effectively
bridge the gap between deep learning and the special
needs of the pillar HPC problem through a set of
techniques on matrix representations, deep learning
structure, and cross-architecture model migrations. The
new solution cuts format selection errors by two
thirds, and improves SpMV performance by 1.73X on
average over the state of the art.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Jia:2018:ODW,
author = "Zhen Jia and Aleksandar Zlateski and Fredo Durand and
Kai Li",
title = "Optimizing {$N$}-dimensional, {Winograd}-based
convolution for manycore {CPUs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "109--123",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178496",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent work on Winograd-based convolution allows for a
great reduction of computational complexity, but
existing implementations are limited to 2D data and a
single kernel size of 3 by 3. They can achieve only
slightly better, and often worse performance than
better optimized, direct convolution implementations.
We propose and implement an algorithm for N-dimensional
Winograd-based convolution that allows arbitrary kernel
sizes and is optimized for manycore CPUs. Our algorithm
achieves high hardware utilization through a series of
optimizations. Our experiments show that on modern
ConvNets, our optimized implementation, is on average
more than 3 x, and sometimes 8 x faster than other
state-of-the-art CPU implementations on an Intel Xeon
Phi manycore processors. Moreover, our implementation
on the Xeon Phi achieves competitive performance for 2D
ConvNets and superior performance for 3D ConvNets,
compared with the best GPU implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Tang:2018:VLF,
author = "Xiongchao Tang and Jidong Zhai and Xuehai Qian and
Bingsheng He and Wei Xue and Wenguang Chen",
title = "{vSensor}: leveraging fixed-workload snippets of
programs for performance variance detection",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "124--136",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178497",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Performance variance becomes increasingly challenging
on current large-scale HPC systems. Even using a fixed
number of computing nodes, the execution time of
several runs can vary significantly. Many parallel
programs executing on supercomputers suffer from such
variance. Performance variance not only causes
unpredictable performance requirement violations, but
also makes it unintuitive to understand the program
behavior. Despite prior efforts, efficient on-line
detection of performance variance remains an open
problem. In this paper, we propose vS ensor, a novel
approach for light-weight and on-line performance
variance detection. The key insight is that, instead of
solely relying on an external detector, the source code
of a program itself could reveal the runtime
performance characteristics. Specifically, many
parallel programs contain code snippets that are
executed repeatedly with an invariant quantity of work.
Based on this observation, we use compiler techniques
to automatically identify these fixed-workload snippets
and use them as performance variance sensors
(v-sensors) that enable effective detection. We
evaluate vSensor with a variety of parallel programs on
the Tianhe-2 system. Results show that vSensor can
effectively detect performance variance on HPC systems.
The performance overhead is smaller than 4\% with up to
16,384 processes. In particular, with vSensor, we found
a bad node with slow memory that slowed a program's
performance by 21\%. As a showcase, we also detected a
severe network performance problem that caused a 3.37X
slowdown for an HPC kernel program on the Tianhe-2
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Prokopec:2018:CTC,
author = "Aleksandar Prokopec",
title = "Cache-tries: concurrent lock-free hash tries with
constant-time operations",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "137--151",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178498",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent non-blocking hash tries have good cache
locality, and horizontally scalable operations.
However, operations on most existing concurrent hash
tries run in O (log n ) time. In this paper, we show
that the concurrent hash trie operations can run in
expected constant time. We present a novel lock-free
concurrent hash trie design that exerts less pressure
on the memory allocator. This hash trie is augmented
with a quiescently consistent cache, which permits the
basic operations to run in expected O (1) time. We show
a statistical analysis for the constant-time bound,
which, to the best of our knowledge, is the first such
proof for hash tries. We also prove the safety,
lock-freedom and linearizability properties. On typical
workloads, our implementation demonstrates up to 5X
performance improvements with respect to the previous
hash trie variants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Chabbi:2018:FFF,
author = "Milind Chabbi and Shasha Wen and Xu Liu",
title = "Featherlight on-the-fly false-sharing detection",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "152--167",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178499",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Shared-memory parallel programs routinely suffer from
false sharing---a performance degradation caused by
different threads accessing different variables that
reside on the same CPU cacheline and at least one
variable is modified. State-of-the-art tools detect
false sharing via a heavyweight process of logging
memory accesses and feeding the ensuing access traces
to an offline cache simulator. We have developed
Feather, a lightweight, on-the-fly false-sharing
detection tool. Feather achieves low overhead by
exploiting two hardware features ubiquitous in
commodity CPUs: the performance monitoring units (PMU)
and debug registers. Additionally, Feather is a
first-of-its-kind tool to detect false sharing in
multi-process applications that use shared memory.
Feather allowed us to scale false-sharing detection to
myriad codes. Feather detected several false-sharing
cases in important multi-core and multi-process codes
including previous PPoPP artifacts. Eliminating false
sharing resulted in dramatic (up to 16x) speedups.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Rawat:2018:ROS,
author = "Prashant Singh Rawat and Fabrice Rastello and Aravind
Sukumaran-Rajam and Louis-No{\"e}l Pouchet and Atanas
Rountev and P. Sadayappan",
title = "Register optimizations for stencils on {GPUs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "168--182",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178500",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The recent advent of compute-intensive GPU
architecture has allowed application developers to
explore high-order 3D stencils for better computational
accuracy. A common optimization strategy for such
stencils is to expose sufficient data reuse by means
such as loop unrolling, with the expectation of
register-level reuse. However, the resulting code is
often highly constrained by register pressure. While
current state-of-the-art register allocators are
satisfactory for most applications, they are unable to
effectively manage register pressure for such complex
high-order stencils, resulting in sub-optimal code with
a large number of register spills. In this paper, we
develop a statement reordering framework that models
stencil computations as a DAG of trees with shared
leaves, and adapts an optimal scheduling algorithm for
minimizing register usage for expression trees. The
effectiveness of the approach is demonstrated through
experimental results on a range of stencils extracted
from application codes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Zheng:2018:FPS,
author = "Da Zheng and Disa Mhembere and Joshua T. Vogelstein
and Carey E. Priebe and Randal Burns",
title = "{FlashR}: parallelize and scale {R} for machine
learning using {SSDs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "183--194",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178501",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "R is one of the most popular programming languages for
statistics and machine learning, but it is slow and
unable to scale to large datasets. The general approach
for having an efficient algorithm in R is to implement
it in C or FORTRAN and provide an R wrapper. FlashR
accelerates and scales existing R code by parallelizing
a large number of matrix functions in the R base
package and scaling them beyond memory capacity with
solid-state drives (SSDs). FlashR performs memory
hierarchy aware execution to speed up parallelized R
code by (i) evaluating matrix operations lazily, (ii)
performing all operations in a DAG in a single
execution and with only one pass over data to increase
the ratio of computation to I/O, (iii) performing two
levels of matrix partitioning and reordering
computation on matrix partitions to reduce data
movement in the memory hierarchy. We evaluate FlashR on
various machine learning and statistics algorithms on
inputs of up to four billion data points. Despite the
huge performance gap between SSDs and RAM, FlashR on
SSDs closely tracks the performance of FlashR in memory
for many algorithms. The R implementations in FlashR
outperforms H$_2$O and Spark MLlib by a factor of 3 ---
20.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Menon:2018:DDC,
author = "Harshitha Menon and Kathryn Mohror",
title = "{DisCVar}: discovering critical variables using
algorithmic differentiation for transient faults",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "195--206",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178502",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Aggressive technology scaling trends have made the
hardware of high performance computing (HPC) systems
more susceptible to faults. Some of these faults can
lead to silent data corruption (SDC), and represent a
serious problem because they alter the HPC simulation
results. In this paper, we present a full-coverage,
systematic methodology called D isCVar to identify
critical variables in HPC applications for protection
against SDC. DisCVar uses automatic differentiation
(AD) to determine the sensitivity of the simulation
output to errors in program variables. We empirically
validate our approach in identifying vulnerable
variables by comparing the results against a
full-coverage code-level fault injection campaign. We
find that our DisCVar correctly identifies the
variables that are critical to ensure application SDC
resilience with a high degree of accuracy compared to
the results of the fault injection campaign.
Additionally, DisCVar requires only two executions of
the target program to generate results, whereas in our
experiments we needed to perform millions of executions
to get the same information from a fault injection
campaign.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Drachsler-Cohen:2018:PCT,
author = "Dana Drachsler-Cohen and Martin Vechev and Eran
Yahav",
title = "Practical concurrent traversals in search trees",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "207--218",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178503",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Operations of concurrent objects often employ
optimistic concurrency-control schemes that consist of
a traversal followed by a validation step. The
validation checks if concurrent mutations interfered
with the traversal to determine if the operation should
proceed or restart. A fundamental challenge is to
discover a necessary and sufficient validation check
that has to be performed to guarantee correctness. In
this paper, we show a necessary and sufficient
condition for validating traversals in search trees.
The condition relies on a new concept of succinct path
snapshots, which are derived from and embedded in the
structure of the tree. We leverage the condition to
design a general lock-free membership test suitable for
any search tree. We then show how to integrate the
validation condition in update operations of
(non-rebalancing) binary search trees, internal and
external, and AVL trees. We experimentally show that
our new algorithms outperform existing ones.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Gianinazzi:2018:CAP,
author = "Lukas Gianinazzi and Pavel Kalvoda and Alessandro {De
Palma} and Maciej Besta and Torsten Hoefler",
title = "Communication-avoiding parallel minimum cuts and
connected components",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "219--232",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178504",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present novel scalable parallel algorithms for
finding global minimum cuts and connected components,
which are important and fundamental problems in graph
processing. To take advantage of future massively
parallel architectures, our algorithms are
communication-avoiding: they reduce the costs of
communication across the network and the cache
hierarchy. The fundamental technique underlying our
work is the randomized sparsification of a graph:
removing a fraction of graph edges, deriving a solution
for such a sparsified graph, and using the result to
obtain a solution for the original input. We design and
implement sparsification with O (1) synchronization
steps. Our global minimum cut algorithm decreases
communication costs and computation compared to the
state-of-the-art, while our connected components
algorithm incurs few cache misses and synchronization
steps. We validate our approach by evaluating MPI
implementations of the algorithms on a petascale
supercomputer. We also provide an approximate variant
of the minimum cut algorithm and show that it
approximates the exact solutions well while using a
fraction of cores in a fraction of time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Khyzha:2018:SPT,
author = "Artem Khyzha and Hagit Attiya and Alexey Gotsman and
Noam Rinetzky",
title = "Safe privatization in transactional memory",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "233--245",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178505",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactional memory (TM) facilitates the development
of concurrent applications by letting the programmer
designate certain code blocks as atomic. Programmers
using a TM often would like to access the same data
both inside and outside transactions, e.g., to improve
performance or to support legacy code. In this case,
programmers would ideally like the TM to guarantee
strong atomicity, where transactions can be viewed as
executing atomically also with respect to
non-transactional accesses. Since guaranteeing strong
atomicity for arbitrary programs is prohibitively
expensive, researchers have suggested guaranteeing it
only for certain data-race free (DRF) programs,
particularly those that follow the privatization idiom:
from some point on, threads agree that a given object
can be accessed non-transactionally. Supporting
privatization safely in a TM is nontrivial, because
this often requires correctly inserting transactional
fences, which wait until all active transactions
complete. Unfortunately, there is currently no
consensus on a single definition of transactional DRF,
in particular, because no existing notion of DRF takes
into account transactional fences. In this paper we
propose such a notion and prove that, if a TM satisfies
a certain condition generalizing opacity and a program
using it is DRF assuming strong atomicity, then the
program indeed has strongly atomic semantics. We show
that our DRF notion allows the programmer to use
privatization idioms. We also propose a method for
proving our generalization of opacity and apply it to
the TL2 TM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Grossman:2018:MPB,
author = "Samuel Grossman and Heiner Litz and Christos
Kozyrakis",
title = "Making pull-based graph processing performant",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "246--260",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178506",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graph processing engines following either the
push-based or pull-based pattern conceptually consist
of a two-level nested loop structure. Parallelizing and
vectorizing these loops is critical for high overall
performance and memory bandwidth utilization. Outer
loop parallelization is simple for both engine types
but suffers from high load imbalance. This work focuses
on inner loop parallelization for pull engines, which
when performed naively leads to a significant increase
in conflicting memory writes that must be synchronized.
Our first contribution is a scheduler-aware interface
for parallel loops that allows us to optimize for the
common case in which each thread executes several
consecutive iterations. This eliminates most write
traffic and avoids all synchronization, leading to
speedups of up to 50X. Our second contribution is the
Vector-Sparse format, which addresses the obstacles to
vectorization that stem from the commonly-used
Compressed-Sparse data structure. Our new format
eliminates unaligned memory accesses and bounds checks
within vector operations, two common problems when
processing low-degree vertices. Vectorization with
Vector-Sparse leads to speedups of up to 2.5X. Our
contributions are embodied in Grazelle, a hybrid graph
processing framework. On a server equipped with four
Intel Xeon E7-4850 v3 processors, Grazelle respectively
outperforms Ligra, Polymer, GraphMat, and X-Stream by
up to 15.2X, 4.6X, 4.7X, and 66.8X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Jangda:2018:EFT,
author = "Abhinav Jangda and Uday Bondhugula",
title = "An effective fusion and tile size model for optimizing
image processing pipelines",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "261--275",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178507",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Effective models for fusion of loop nests continue to
remain a challenge in both general-purpose and
domain-specific language (DSL) compilers. The
difficulty often arises from the combinatorial
explosion of grouping choices and their interaction
with parallelism and locality. This paper presents a
new fusion algorithm for high-performance
domain-specific compilers for image processing
pipelines. The fusion algorithm is driven by dynamic
programming and explores spaces of fusion possibilities
not covered by previous approaches, and is driven by a
cost function more concrete and precise in capturing
optimization criteria than prior approaches. The fusion
model is particularly tailored to the transformation
and optimization sequence applied by PolyMage and
Halide, two recent DSLs for image processing pipelines.
Our model-driven technique when implemented in PolyMage
provides significant improvements (up to 4.32X) over
PolyMage's approach (which uses auto-tuning to aid its
model), and over Halide's automatic approach (by up to
2.46X) on two state-of-the-art shared-memory multicore
architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Wang:2018:LLD,
author = "Lei Wang and Liangji Zhuang and Junhang Chen and
Huimin Cui and Fang Lv and Ying Liu and Xiaobing Feng",
title = "{Lazygraph}: lazy data coherency for replicas in
distributed graph-parallel computation",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "276--289",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178508",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Replicas$^1$ of a vertex play an important role in
existing distributed graph processing systems which
make a single vertex to be parallel processed by
multiple machines and access remote neighbors locally
without any remote access. However, replicas of
vertices introduce data coherency problem. Existing
distributed graph systems treat replicas of a vertex v
as an atomic and indivisible vertex, and use an eager
data coherency approach to guarantee replicas
atomicity. In eager data coherency approach, any
changes to vertex data must be immediately communicated
to all replicas of v, thus leading to frequent global
synchronizations and communications. In this paper, we
propose a lazy data coherency approach, called
LazyAsync, which treats replicas of a vertex as
independent vertices and maintains the data coherency
by computations, rather than communications in existing
eager approach. Our approach automatically selects some
data coherency points from the graph algorithm, and
maintains all replicas to share the same global view
only at such points, which means the replicas are
enabled to maintain different local views between any
two adjacent data coherency points. Based on
PowerGraph, we develop a distributed graph processing
system LazyGraph to implement the LazyAsync approach
and exploit graph-aware optimizations. On a 48-node
EC2-like cluster, LazyGraph outperforms PowerGraph on
four widely used graph algorithms across a variety of
real-world graphs, with a speedup ranging from 1.25x to
10.69x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Sun:2018:PPA,
author = "Yihan Sun and Daniel Ferizovic and Guy E. Belloch",
title = "{PAM}: parallel augmented maps",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "290--304",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Ordered (key-value) maps are an important and
widely-used data type for large-scale data processing
frameworks. Beyond simple search, insertion and
deletion, more advanced operations such as range
extraction, filtering, and bulk updates form a critical
part of these frameworks. We describe an interface for
ordered maps that is augmented to support fast range
queries and sums, and introduce a parallel and
concurrent library called PAM (Parallel Augmented Maps)
that implements the interface. The interface includes a
wide variety of functions on augmented maps ranging
from basic insertion and deletion to more interesting
functions such as union, intersection, filtering,
extracting ranges, splitting, and range-sums. We
describe algorithms for these functions that are
efficient both in theory and practice. As examples of
the use of the interface and the performance of PAM we
apply the library to four applications: simple range
sums, interval trees, 2D range trees, and ranked word
index searching. The interface greatly simplifies the
implementation of these data structures over direct
implementations. Sequentially the code achieves
performance that matches or exceeds existing libraries
designed specially for a single application, and in
parallel our implementation gets speedups ranging from
40 to 90 on 72 cores with 2-way hyperthreading.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Fu:2018:ESM,
author = "Zhouwang Fu and Tao Song and Zhengwei Qi and Haibing
Guan",
title = "Efficient shuffle management with {SCache} for {DAG}
computing frameworks",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "305--316",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178510",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In large-scale data-parallel analytics, shuffle, or
the cross-network read and aggregation of partitioned
data between tasks with data dependencies, usually
brings in large overhead. To reduce shuffle overhead,
we present SCache, an open source plug-in system that
particularly focuses on shuffle optimization. By
extracting and analyzing shuffle dependencies prior to
the actual task execution, SCache can adopt heuristic
pre-scheduling combining with shuffle size prediction
to pre-fetch shuffle data and balance load on each
node. Meanwhile, SCache takes full advantage of the
system memory to accelerate the shuffle process. We
have implemented SCache and customized Spark to use it
as the external shuffle service and co-scheduler. The
performance of SCache is evaluated with both
simulations and testbed experiments on a 50-node Amazon
EC2 cluster. Those evaluations have demonstrated that,
by incorporating SCache, the shuffle overhead of Spark
can be reduced by nearly 89\%, and the overall
completion time of TPC-DS queries improves 40\% on
average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Li:2018:HPG,
author = "Xueqi Li and Guangming Tan and Bingchen Wang and
Ninghui Sun",
title = "High-performance genomic analysis framework with
in-memory computing",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "317--328",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178511",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we propose an in-memory computing
framework (called GPF) that provides a set of genomic
formats, APIs and a fast genomic engine for large-scale
genomic data processing. Our GPF comprises two main
components: (1) scalable genomic data formats and API.
(2) an advanced execution engine that supports
efficient compression of genomic data and eliminates
redundancies in the execution engine of our GPF. We
further present both system and algorithm-specific
implementations for users to build genomic analysis
pipeline without any acquaintance of Spark parallel
programming. To test the performance of GPF, we built a
WGS pipeline on top of our GPF as a test case. Our
experimental data indicate that GPF completes
Whole-Genome-Sequencing (WGS) analysis of 146.9G bases
Human Platinum Genome in running time of 24 minutes,
with over 50\% parallel efficiency when used on 2048
CPU cores. Together, our GPF framework provides a fast
and general engine for large-scale genomic data
processing which supports in-memory computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Liu:2018:GUC,
author = "Yang Liu and Jianguo Wang and Steven Swanson",
title = "{Griffin}: uniting {CPU} and {GPU} in information
retrieval systems for intra-query parallelism",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "327--337",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Interactive information retrieval services, such as
enterprise search and document search, must provide
relevant results with consistent, low response times in
the face of rapidly growing data sets and query loads.
These growing demands have led researchers to consider
a wide range of optimizations to reduce response
latency, including query processing parallelization and
acceleration with co-processors such as GPUs. However,
previous work runs queries either on GPU or CPU,
ignoring the fact that the best processor for a given
query depends on the query's characteristics, which may
change as the processing proceeds. We present Griffin,
an IR systems that dynamically combines GPU- and
CPU-based algorithms to process individual queries
according to their characteristics. Griffin uses
state-of-the-art CPU-based query processing techniques
and incorporates a novel approach to GPU-based query
evaluation. Our GPU-based approach, as far as we know,
achieves the best available GPU search performance by
leveraging a new compression scheme and exploiting an
advanced merge-based intersection algorithm. We
evaluate Griffin with real world queries and datasets,
and show that it improves query performance by 10x
compared to a highly optimized CPU-only implementation,
and 1.5x compared to our GPU-approach running alone. We
also find that Griffin helps reduce the 95th-, 99th-,
and 99.9th-percentile query response time by 10.4x,
16.1x, and 26.8x, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Wang:2018:SFS,
author = "Xinliang Wang and Weifeng Liu and Wei Xue and Li Wu",
title = "{swSpTRSV}: a fast sparse triangular solve with sparse
level tile layout on {Sunway} architectures",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "338--353",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178513",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Sparse triangular solve (SpTRSV) is one of the most
important kernels in many real-world applications.
Currently, much research on parallel SpTRSV focuses on
level-set construction for reducing the number of
inter-level synchronizations. However, the
out-of-control data reuse and high cost for global
memory or shared cache access in inter-level
synchronization have been largely neglected in existing
work. In this paper, we propose a novel data layout
called Sparse Level Tile to make all data reuse under
control, and design a Producer-Consumer pairing method
to make any inter-level synchronization only happen in
very fast register communication. We implement our data
layout and algorithms on an SW26010 many-core
processor, which is the main building-block of the
current world fastest supercomputer Sunway Taihulight.
The experimental results of testing all 2057 square
matrices from the Florida Matrix Collection show that
our method achieves an average speedup of 6.9 and the
best speedup of 38.5 over parallel level-set method.
Our method also outperforms the latest methods on a KNC
many-core processor in 1856 matrices and the latest
methods on a K80 GPU in 1672 matrices, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Wilcox:2018:VVH,
author = "James R. Wilcox and Cormac Flanagan and Stephen N.
Freund",
title = "{VerifiedFT}: a verified, high-performance precise
dynamic race detector",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "354--367",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178514",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic data race detectors are valuable tools for
testing and validating concurrent software, but to
achieve good performance they are typically implemented
using sophisticated concurrent algorithms. Thus, they
are ironically prone to the exact same kind of
concurrency bugs they are designed to detect. To
address these problems, we have developed VerifiedFT,
a clean slate redesign of the FastTrack race detector
[19]. The VerifiedFT analysis provides the same
precision guarantee as FastTrack, but is simpler to
implement correctly and efficiently, enabling us to
mechanically verify an implementation of its core
algorithm using CIVL [27]. Moreover, VerifiedFT
provides these correctness guarantees without
sacrificing any performance over current
state-of-the-art (but complex and unverified) FastTrack
implementations for Java.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Xu:2018:EPD,
author = "Yifan Xu and I-Ting Angelina Lee and Kunal Agrawal",
title = "Efficient parallel determinacy race detection for
two-dimensional dags",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "368--380",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178515",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A program is said to have a determinacy race if
logically parallel parts of a program access the same
memory location and one of the accesses is a write.
These races are generally bugs in the program since
they lead to non-deterministic program behavior
different schedules of the program can lead to
different results. Most prior work on detecting these
races focuses on a subclass of programs with fork-join
parallelism. This paper presents a race-detection
algorithm, 2D-Order, for detecting races in a more
general class of programs, namely programs whose
dependence structure can be represented as planar dags
embedded in 2D grids. Such dependence structures arise
from programs that use pipelined parallelism or dynamic
programming recurrences. Given a computation with $ T_1
$ work and $ T_\infty $ span, 2D-Order executes it
while also detecting races in $ O(T_1 / P + T_\infty) $
time on $P$ processors, which is asymptotically
optimal. We also implemented PRacer, a race-detection
algorithm based on 2D-Order for Cilk-P, which is a
language for expressing pipeline parallelism. Empirical
results demonstrate that PRacer incurs reasonable
overhead and exhibits scalability similar to the
baseline (executions without race detection) when
running on multiple cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Acar:2018:PCM,
author = "Umut A. Acar and Vitaly Aksenov and Arthur
Chargu{\'e}raud and Mike Rainey",
title = "Performance challenges in modular parallel programs",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "381--382",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178516",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Over the past decade, many programming languages and
systems for parallel-computing have been developed,
including Cilk, Fork/Join Java, Habanero Java, Parallel
Haskell, Parallel ML, and X10. Although these systems
raise the level of abstraction at which parallel code
are written, performance continues to require the
programmer to perform extensive optimizations and
tuning, often by taking various architectural details
into account. One such key optimization is granularity
control, which requires the programmer to determine
when and how parallel tasks should be sequentialized.
In this paper, we briefly describe some of the
challenges associated with automatic granularity
control when trying to achieve portable performance for
parallel programs with arbitrary nesting of parallel
constructs. We consider a result from the
functional-programming community, whose starting point
is to consider an ``oracle'' that can predict the work
of parallel codes, and thereby control granularity. We
discuss the challenges in implementing such an oracle
and proving that it has the desired theoretical
properties under the nested-parallel programming
model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Arif:2018:RBP,
author = "Mahwish Arif and Hans Vandierendonck",
title = "Reducing the burden of parallel loop schedulers for
many-core processors",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "383--384",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This work proposes a low-overhead half-barrier pattern
to schedule fine-grain parallel loops and considers its
integration in the Intel OpenMP and Cilkplus
schedulers. Experimental evaluation demonstrates that
the scheduling overhead of our techniques is 43\% lower
than Intel OpenMP and 12.1x lower than Cilk. We observe
22\% speedup on 48 threads, with a peak of 2.8x
speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Cohen:2018:RTA,
author = "Nachshon Cohen and Erez Petrank and James R. Larus",
title = "Reducing transaction aborts by looking to the future",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "385--386",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178518",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Transactions are widely used in database engines and
they becoming increasingly useful as a general
synchronization technique for multicore machines [1].
Transactional systems allow a programmer to encapsulate
multiple operations inside a transaction. All these
operations appear to be executed atomically or not at
all.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Correia:2018:STR,
author = "Andreia Correia and Pedro Ramalhete",
title = "Strong trylocks for reader-writer locks",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "387--388",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178519",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A reader-writer lock provides basic methods for shared
and exclusive lock acquisition. A thread calling one of
these methods may have to wait indefinitely to enter
its critical section, with no guarantee of completion.
We present two new reader-writer strong trylock
algorithms, where a call to a trylock method always
completes in a finite number of steps, and is
guaranteed to succeed unless there is a linearizable
history for which another thread has the lock. The
first algorithm, named StrongTryRW, uses a single word
of memory to reach consensus, thus yielding reduced
scalability for readers. To address read scalability,
we designed StrongTryRWRI which matches in throughput
the current state of the art reader-writer lock
algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Dong:2018:SSM,
author = "Yao Dong and Ana Milanova and Julian Dolby",
title = "{SecureMR}: secure mapreduce using homomorphic
encryption and program partitioning",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "389--390",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178520",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In cloud computing customers upload data and
computation to cloud providers. As they cede their data
to the cloud provider, they may cede data
confidentiality. We develop SecureMR, a system that
analyzes and transforms MapReduce programs to operate
over encrypted data. SecureMR makes use of partially
homomorphic encryption and a trusted client. We
evaluate SecureMR on a set of MapReduce benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Firoz:2018:SDV,
author = "Jesun Sahariar Firoz and Marcin Zalewski and Andrew
Lumsdaine",
title = "A scalable distance-1 vertex coloring algorithm for
power-law graphs",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "391--392",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178521",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a distributed, unordered, label-correcting
distance-1 vertex coloring algorithm, called
Distributed Control (DC) coloring algorithm. DC
eliminates the need for vertex-centric barriers and
global synchronization for color refinement, relying
only on atomic operations and local termination
detection to update vertex color. We implement our DC
coloring algorithm and the well-known Jones-Plassmann
algorithm in the AM++ AMT runtime and compare their
performance. We show that, with runtime support, the
elimination of waiting time of vertex-centric barriers
and investing this time for local ordering results in
better execution time for power-law graphs with dense
local subgraphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Hayashi:2018:SMP,
author = "Koby Hayashi and Grey Ballard and Yujie Jiang and
Michael J. Tobia",
title = "Shared-memory parallelization of {MTTKRP} for dense
tensors",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "393--394",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The matricized-tensor times Khatri--Rao product
(MTTKRP) is the computational bottleneck for algorithms
computing CP decompositions of tensors. In this work,
we develop shared-memory parallel algorithms for MTTKRP
involving dense tensors. The algorithms cast nearly all
of the computation as matrix operations in order to use
optimized BLAS subroutines, and they avoid reordering
tensor entries in memory. We use our parallel
implementation to compute a CP decomposition of a
neuroimaging data set and achieve a speedup of up to
7.4X over existing parallel software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Jiang:2018:RPS,
author = "Peng Jiang and Gagan Agrawal",
title = "Revealing parallel scans and reductions in sequential
loops through function reconstruction",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "395--396",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178523",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many sequential loops are actually scans or reductions
and can be parallelized across iterations despite the
loop-carried dependences. In this work, we consider the
parallelization of such scan/reduction loops, and
propose a practical runtime approach called
sampling-and-reconstruction to extract the hidden
scan/reduction patterns in these loops.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Hong:2018:PMG,
author = "Changwan Hong and Aravind Sukumaran-Rajam and Jinsung
Kim and Prashant Singh Rawat and Sriram Krishnamoorthy
and Louis-No{\"e}l Pouchet and Fabrice Rastello and P.
Sadayappan",
title = "Performance modeling for {GPUs} using abstract kernel
emulation",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "397--398",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178524",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Performance modeling of GPU kernels is a significant
challenge. In this paper, we develop a novel approach
to performance modeling for GPUs through abstract
kernel emulation along with latency/gap modeling of
resources. Experimental results on all benchmarks from
the Rodinia suite demonstrate good accuracy in
predicting execution time on multiple GPU platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Jordan:2018:TCD,
author = "Herbert Jordan and Bernhard Scholz and Pavle Subotic",
title = "Two concurrent data structures for efficient datalog
query processing",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "399--400",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178525",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In recent years, Datalog has gained popularity for the
implementation of advanced data analysis. Applications
benefit from Datalog's high-level, declarative syntax,
and availability of efficient algorithms for computing
solutions. The efficiency of Datalog engines has
reached a point where engines such as Souffl{\'e} have
reported performance results comparable to low-level
hand-crafted alternatives [3].",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Kerbl:2018:SQW,
author = "Bernhard Kerbl and J{\"o}rg M{\"u}ller and Michael
Kenzel and Dieter Schmalstieg and Markus Steinberger",
title = "A scalable queue for work distribution on {GPUs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "401--402",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178526",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Harnessing the power of massively parallel devices
like the graphics processing unit (GPU) is difficult
for algorithms that show dynamic or inhomogeneous
workloads. To achieve high performance, such advanced
algorithms require scalable, concurrent queues to
collect and distribute work. We present a new
concurrent work queue, the Broker Queue, a highly
efficient, linearizable queue for fine-granular work
distribution on the GPU. We evaluate its usability and
benefits in contrast to existing queuing algorithms.
Our queue is up to one order of magnitude faster than
non-blocking queues, and outperforms simpler queue
designs that are unfit for fine-granular work
distribution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Licht:2018:DSF,
author = "Johannes de Fine Licht and Michaela Blott and Torsten
Hoefler",
title = "Designing scalable {FPGA} architectures using
high-level synthesis",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "403--404",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178527",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Massive spatial parallelism at low energy gives FPGAs
the potential to be core components in large scale high
performance computing (HPC) systems. In this paper we
present four major design steps that harness high-level
synthesis (HLS) to implement scalable spatial FPGA
algorithms. To aid productivity, we introduce the open
source library hlslib to complement HLS. We evaluate
kernels designed with our approach on an FPGA
accelerator board, demonstrating high performance and
board utilization with enhanced programmer
productivity. By following our guidelines, programmers
can use HLS to develop efficient parallel algorithms
for FPGA, scaling their implementations with increased
resources on future hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Liu:2018:LLC,
author = "Bo Liu and Wenbin Jiang and Hai Jin and Xuanhua Shi
and Yang Ma",
title = "{Layrub}: layer-centric {GPU} memory reuse and data
migration in extreme-scale deep learning systems",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "405--406",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178528",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Growing accuracy and robustness of Deep Neural
Networks (DNN) models are accompanied by growing model
capacity (going deeper or wider). However, high memory
requirements of those models make it difficult to
execute the training process in one GPU. To address it,
we first identify the memory usage characteristics for
deep and wide convolutional networks, and demonstrate
the opportunities of memory reuse on both intra-layer
and inter-layer levels. We then present Layrub, a
runtime data placement strategy that orchestrates the
execution of training process. It achieves
layer-centric reuse to reduce memory consumption for
extreme-scale deep learning that cannot be run on one
single GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Liu:2018:RBI,
author = "Junhong Liu and Xin He and Weifeng Liu and Guangming
Tan",
title = "Register-based implementation of the sparse general
matrix--matrix multiplication on {GPUs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "407--408",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178529",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "General sparse matrix--matrix multiplication (SpGEMM)
is an essential building block in a number of
applications. In our work, we fully utilize GPU
registers and shared memory to implement an efficient
and load balanced SpGEMM in comparison with the
existing implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Mururu:2018:QRE,
author = "Girish Mururu and Ada Gavrilovska and Santosh Pande",
title = "Quantifying and reducing execution variance in {STM}
via model driven commit optimization",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "409--410",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178530",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Simplified parallel programming coupled with an
ability to express speculative computation is realized
with Software Transactional Memory (STM). Although STMs
are gaining popularity because of significant
improvements in parallel performance, they exhibit
enormous variation in transaction execution with
non-repeatable performance behavior which is
unacceptable in many application domains, especially in
which frame rates and responsiveness should be
predictable. Thus, reducing execution variance in STM
is an important performance goal that has been mostly
overlooked. In this work, we minimize the variance in
execution time of threads in STM by reducing
non-determinism exhibited due to speculation by first
quantifying non-determinism and generating an automaton
that models the behavior of STM. We used the automaton
to guide the STM to a less non-deterministic execution
that reduced the variance in frame rate by a maximum of
65\% on a version of real-world Quake3 game.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Park:2018:TGM,
author = "Jungho Park and Hyungmin Cho and Wookeun Jung and
Jaejin Lee",
title = "Transparent {GPU} memory management for {DNNs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "411--412",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern DNN frameworks exploit GPU acceleration by
default to achieve high performance. The limitation of
GPU memory capacity becomes a serious problem because
DNNs are becoming deeper and larger. This paper
proposes a purely software-based transparent solution,
called tvDNN, to the GPU memory capacity problem. It is
based on GPU memory swapping and memory object
sectioning techniques. It also provides an efficient
memory-object swapping schedule based on ILP (optimal)
and heuristics (suboptimal). The experimental results
show that tvDNN enables Caffe to build VGG-16 with a
large batch size, such as 256 or 512, using a few GB of
GPU memory without significant performance
degradation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "Distributed Neural Network (DNN)",
remark = "PPoPP '18 proceedings.",
}
@Article{Poter:2018:SIA,
author = "Manuel P{\"o}ter and Jesper Larsson Tr{\"a}ff",
title = "Stamp-it, amortized constant-time memory reclamation
in comparison to five other schemes",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "413--414",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178532",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The memory reclamation problem is to determine, for
any given allocated memory node, when there are no more
references to the node, allowing it to be safely
returned to the memory management system. In a
concurrent context, the memory reclamation problem is
highly non-trivial, since there may be more than one
thread referencing an allocated node unbeknownst to the
other threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Reif:2018:PSA,
author = "Stefan Reif and Wolfgang Schr{\"o}der-Preikschat",
title = "A predictable synchronisation algorithm",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "415--416",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178533",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Interaction with physical objects often imposes
latency requirements to multi-core embedded systems.
One consequence is the need for synchronisation
algorithms that provide predictable latency, in
addition to high throughput. We present a
synchronisation algorithm that needs at most 7 atomic
memory operations per asynchronous critical section.
The performance is competitive, at least, to locks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Riebler:2018:ACA,
author = "Heinrich Riebler and Gavin Vaz and Tobias Kenter and
Christian Plessl",
title = "Automated code acceleration targeting heterogeneous
{OpenCL} devices",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "417--418",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Accelerators can offer exceptional performance
advantages. However, programmers need to spend
considerable efforts on acceleration, without knowing
how sustainable the employed programming models,
languages and tools are. To tackle this challenge, we
propose and demonstrate a new runtime system called HT
rOP that is able to automatically generate and execute
OpenCL code from sequential CPU code. HTrOP transforms
suitable data-parallel loops into independent
OpenCL-typical work-items and handles concrete calls to
these devices through a mix of library components and
application-specific OpenCL host code. Computational
hotspots are identified and can be offloaded to
different resources (CPU, GPGPU and Xeon Phi). We
demonstrate the potential of HTrOP on a broad set of
applications and are able to improve the performance by
4.3X on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Barrera:2018:GPA,
author = "Isaac S{\'a}nchez Barrera and Marc Casas and Miquel
Moret{\'o} and Eduard Ayguad{\'e} and Jes{\'u}s Labarta
and Mateo Valero",
title = "Graph partitioning applied to {DAG} scheduling to
reduce {NUMA} effects",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "419--420",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178535",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The complexity of shared memory systems is becoming
more relevant as the number of memory domains
increases, with different access latencies and
bandwidth rates depending on the proximity between the
cores and the devices containing the data. In this
context, techniques to manage and mitigate non-uniform
memory access (NUMA) effects consist in migrating
threads, memory pages or both and are typically applied
by the system software. We propose techniques at the
runtime system level to reduce NUMA effects on parallel
applications. We leverage runtime system metadata in
terms of a task dependency graph. Our approach, based
on graph partitioning methods, is able to provide
parallel performance improvements of 1.12X on average
with respect to the state-of-the-art.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Volkov:2018:MSG,
author = "Vasily Volkov",
title = "A microbenchmark to study {GPU} performance models",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "421--422",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Basic microarchitectural features of NVIDIA GPUs have
been stable for a decade, and many analytic solutions
were proposed to model their performance. We present a
way to review, systematize, and evaluate these
approaches by using a microbenchmark. In this manner,
we produce a brief algebraic summary of key elements of
selected performance models, identify patterns in their
design, and highlight their previously unknown
limitations. Also, we identify a potentially superior
method for estimating performance based on classical
work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Zhao:2018:SCG,
author = "Tuowen Zhao and Mary Hall and Protonu Basu and Samuel
Williams and Hans Johansen",
title = "{SIMD} code generation for stencils on brick
decompositions",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "423--424",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178537",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a stencil library and associated compiler
code generation framework designed to maximize
performance on higher-order stencil computations
through the use of two main technologies: a
fine-grained brick data layout designed to exploit the
inherent multidimensional spatial locality endemic to
stencil computations, and a vector scatter associative
reordering transformation that reduces vector loads and
alignment operations and exposes opportunities for the
backend compiler to reduce computation. For a range of
stencil computations, we compare the generated code
expressed in the brick library to the standard tiled
code. We attain up to a 7.2X speedup on the most
complex stencils when running on an Intel Knights
Landing (Xeon Phi) processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PPoPP '18 proceedings.",
}
@Article{Fujiki:2018:MDP,
author = "Daichi Fujiki and Scott Mahlke and Reetuparna Das",
title = "In-Memory Data Parallel Processor",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "1--14",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173171",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recent developments in Non-Volatile Memories (NVMs)
have opened up a new horizon for in-memory computing.
Despite the significant performance gain offered by
computational NVMs, previous works have relied on
manual mapping of specialized kernels to the memory
arrays, making it infeasible to execute more general
workloads. We combat this problem by proposing a
programmable in-memory processor architecture and
data-parallel programming framework. The efficiency of
the proposed in-memory processor comes from two
sources: massive parallelism and reduction in data
movement. A compact instruction set provides
generalized computation capabilities for the memory
array. The proposed programming framework seeks to
leverage the underlying parallelism in the hardware by
merging the concepts of data-flow and vector
processing. To facilitate in-memory programming, we
develop a compilation framework that takes a TensorFlow
input and generates code for our in-memory processor.
Our results demonstrate 7.5x speedup over a multi-core
CPU server for a set of applications from Parsec and
763x speedup over a server-class GPU for a set of
Rodinia benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Fix:2018:HMT,
author = "Jordan Fix and Nayana P. Nagendra and Sotiris
Apostolakis and Hansen Zhang and Sophie Qiu and David
I. August",
title = "Hardware Multithreaded Transactions",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "15--29",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173172",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Speculation with transactional memory systems helps
programmers and compilers produce profitable
thread-level parallel programs. Prior work shows that
supporting transactions that can span multiple threads,
rather than requiring transactions be contained within
a single thread, enables new types of speculative
parallelization techniques for both programmers and
parallelizing compilers. Unfortunately, software
support for multi-threaded transactions (MTXs) comes
with significant additional inter-thread communication
overhead for speculation validation. This overhead can
make otherwise good parallelization unprofitable for
programs with sizeable read and write sets. Some
programs using these prior software MTXs overcame this
problem through significant efforts by expert
programmers to minimize these sets and optimize
communication, capabilities which compiler technology
has been unable to equivalently achieve. Instead, this
paper makes speculative parallelization less laborious
and more feasible through low-overhead speculation
validation, presenting the first complete design,
implementation, and evaluation of hardware MTXs. Even
with maximal speculation validation of every load and
store inside transactions of tens to hundreds of
millions of instructions, profitable parallelization of
complex programs can be achieved. Across 8 benchmarks,
this system achieves a geomean speedup of 99\% over
sequential execution on a multicore machine with 4
cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Kumar:2018:BTF,
author = "Rakesh Kumar and Boris Grot and Vijay Nagarajan",
title = "Blasting through the Front-End Bottleneck with
{Shotgun}",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "30--42",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173178",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The front-end bottleneck is a well-established problem
in server workloads owing to their deep software stacks
and large instruction working sets. Despite years of
research into effective L1-I and BTB prefetching,
state-of-the-art techniques force a trade-off between
performance and metadata storage costs. This work
introduces Shotgun, a BTB-directed front-end prefetcher
powered by a new BTB organization that maintains a
logical map of an application's instruction footprint,
which enables high-efficacy prefetching at low storage
cost. To map active code regions, Shotgun precisely
tracks an application's global control flow (e.g.,
function and trap routine entry points) and summarizes
local control flow within each code region. Because the
local control flow enjoys high spatial locality, with
most functions comprised of a handful of instruction
cache blocks, it lends itself to a compact region-based
encoding. Meanwhile, the global control flow is
naturally captured by the application's unconditional
branch working set (calls, returns, traps). Based on
these insights, Shotgun devotes the bulk of its BTB
capacity to branches responsible for the global control
flow and a spatial encoding of their target regions. By
effectively capturing a map of the application's
instruction footprint in the BTB, Shotgun enables
highly effective BTB-directed prefetching. Using a
storage budget equivalent to a conventional BTB,
Shotgun outperforms the state-of-the-art BTB-directed
front-end prefetcher by up to 14\% on a set of varied
commercial workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Besta:2018:SNL,
author = "Maciej Besta and Syed Minhaj Hassan and Sudhakar
Yalamanchili and Rachata Ausavarungnirun and Onur Mutlu
and Torsten Hoefler",
title = "Slim {NoC}: a Low-Diameter On-Chip Network Topology
for High Energy Efficiency and Scalability",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "43--55",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177158",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging chips with hundreds and thousands of cores
require networks with unprecedented energy/area
efficiency and scalability. To address this, we propose
Slim NoC (SN): a new on-chip network design that
delivers significant improvements in efficiency and
scalability compared to the state-of-the-art. The key
idea is to use two concepts from graph and number
theory, degree-diameter graphs combined with non-prime
finite fields, to enable the smallest number of ports
for a given core count. SN is inspired by
state-of-the-art off-chip topologies; it identifies and
distills their advantages for NoC settings while
solving several key issues that lead to significant
overheads on-chip. SN provides NoC-specific layouts,
which further enhance area/energy efficiency. We show
how to augment SN with state-of-the-art router
microarchitecture schemes such as Elastic Links, to
make the network even more scalable and efficient. Our
extensive experimental evaluations show that SN
outperforms both traditional low-radix topologies
(e.g., meshes and tori) and modern high-radix networks
(e.g., various Flattened Butterflies) in area, latency,
throughput, and static/dynamic power consumption for
both synthetic and real workloads. SN provides a
promising direction in scalable and energy-efficient
NoC topologies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Nguyen:2018:SCM,
author = "Khanh Nguyen and Lu Fang and Christian Navasca and
Guoqing Xu and Brian Demsky and Shan Lu",
title = "{Skyway}: Connecting Managed Heaps in Distributed Big
Data Systems",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "56--69",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173200",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Managed languages such as Java and Scala are
prevalently used in development of large-scale
distributed systems. Under the managed runtime, when
performing data transfer across machines, a task
frequently conducted in a Big Data system, the system
needs to serialize a sea of objects into a byte
sequence before sending them over the network. The
remote node receiving the bytes then deserializes them
back into objects. This process is both
performance-inefficient and labor-intensive: (1) object
serialization/deserialization makes heavy use of
reflection, an expensive runtime operation and/or (2)
serialization/deserialization functions need to be
hand-written and are error-prone. This paper presents
Skyway, a JVM-based technique that can directly connect
managed heaps of different (local or remote) JVM
processes. Under Skyway, objects in the source heap can
be directly written into a remote heap without changing
their formats. Skyway provides performance benefits to
any JVM-based system by completely eliminating the need
(1) of invoking serialization/deserialization
functions, thus saving CPU time, and (2) of requiring
developers to hand-write serialization functions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Wu:2018:EBJ,
author = "Mingyu Wu and Ziming Zhao and Haoyu Li and Heting Li
and Haibo Chen and Binyu Zang and Haibing Guan",
title = "{Espresso}: Brewing {Java} For More Non-Volatility
with Non-volatile Memory",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "70--83",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173201",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Fast, byte-addressable non-volatile memory (NVM)
embraces both near-DRAM latency and disk-like
persistence, which has generated considerable interests
to revolutionize system software stack and programming
models. However, it is less understood how NVM can be
combined with managed runtime like Java virtual machine
(JVM) to ease persistence management. This paper
proposes Espresso, a holistic extension to Java and its
runtime, to enable Java programmers to exploit NVM for
persistence management with high performance. Espresso
first provides a general persistent heap design called
Persistent Java Heap (PJH) to manage persistent data as
normal Java objects. The heap is then strengthened with
a recoverable mechanism to provide crash consistency
for heap metadata. Espresso further provides a new
abstraction called Persistent Java Object (PJO) to
provide an easy-to-use but safe persistence programming
model for programmers to persist application data.
Evaluation confirms that Espresso significantly
outperforms state-of-art NVM support for Java (i.e.,
JPA and PCJ) while being compatible to data structures
in existing Java programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Wang:2018:ECI,
author = "Wenwen Wang and Stephen McCamant and Antonia Zhai and
Pen-Chung Yew",
title = "Enhancing Cross-{ISA} {DBT} Through Automatically
Learned Translation Rules",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "84--97",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177160",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a novel approach for dynamic
binary translation (DBT) to automatically learn
translation rules from guest and host binaries compiled
from the same source code. The learned translation
rules are then verified via binary symbolic execution
and used in an existing DBT system, QEMU, to generate
more efficient host binary code. Experimental results
on SPEC CINT2006 show that the average time of learning
a translation rule is less than two seconds. With the
rules learned from a collection of benchmark programs
excluding the targeted program itself, an average 1.25X
performance speedup over QEMU can be achieved for SPEC
CINT2006. Moreover, the translation overhead introduced
by this rule-based approach is very small even for
short-running workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Rajadurai:2018:GSL,
author = "Sumanaruban Rajadurai and Jeffrey Bosboom and Weng-Fai
Wong and Saman Amarasinghe",
title = "{Gloss}: Seamless Live Reconfiguration and
Reoptimization of Stream Programs",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "98--112",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173170",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "An important class of applications computes on
long-running or infinite streams of data, often with
known fixed data rates. The latter is referred to as
synchronous data flow ~(SDF) streams. These stream
applications need to run on clusters or the cloud due
to the high performance requirement. Further, they
require live reconfiguration and reoptimization for
various reasons such as hardware maintenance, elastic
computation, or to respond to fluctuations in resources
or application workload. However, reconfiguration and
reoptimization without downtime while accurately
preserving program state in a distributed environment
is difficult. In this paper, we introduce Gloss, a
suite of compiler and runtime techniques for live
reconfiguration of distributed stream programs. Gloss,
for the first time, avoids periods of zero throughput
during the reconfiguration of both stateless and
stateful SDF based stream programs. Furthermore, unlike
other systems, Gloss globally reoptimizes and
completely recompiles the program during
reconfiguration. This permits it to reoptimize the
application for entirely new configurations that it may
not have encountered before. All these Gloss operations
happen in-situ, requiring no extra hardware resources.
We show how Gloss allows stream programs to reconfigure
and reoptimize with no downtime and minimal overhead,
and demonstrate the wider applicability of it via a
variety of experiments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Yoon:2018:FTB,
author = "Hongil Yoon and Jason Lowe-Power and Gurindar S.
Sohi",
title = "Filtering Translation Bandwidth with Virtual Caching",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "113--127",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173195",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous computing with GPUs integrated on the
same chip as CPUs is ubiquitous, and to increase
programmability many of these systems support virtual
address accesses from GPU hardware. However, this
entails address translation on every memory access. We
observe that future GPUs and workloads show very high
bandwidth demands (up to 4 accesses per cycle in some
cases) for shared address translation hardware due to
frequent private TLB misses. This greatly impacts
performance (32\% average performance degradation
relative to an ideal MMU). To mitigate this overhead,
we propose a software-agnostic, practical, GPU virtual
cache hierarchy. We use the virtual cache hierarchy as
an effective address translation bandwidth filter. We
observe many requests that miss in private TLBs find
corresponding valid data in the GPU cache hierarchy.
With a GPU virtual cache hierarchy, these TLB misses
can be filtered (i.e., virtual cache hits),
significantly reducing bandwidth demands for the shared
address translation hardware. In addition,
accelerator-specific attributes (e.g., less likelihood
of synonyms) of GPUs reduce the design complexity of
virtual caches, making a whole virtual cache hierarchy
(including a shared L2 cache) practical for GPUs. Our
evaluation shows that the entire GPU virtual cache
hierarchy effectively filters the high address
translation bandwidth, achieving almost the same
performance as an ideal MMU. We also evaluate L1-only
virtual cache designs and show that using a whole
virtual cache hierarchy obtains additional performance
benefits (1.31$ \times $ speedup on average).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Maleki:2018:AHP,
author = "Sepideh Maleki and Martin Burtscher",
title = "Automatic Hierarchical Parallelization of Linear
Recurrences",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "128--138",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173168",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Linear recurrences encompass many fundamental
computations including prefix sums and digital filters.
Later result values depend on earlier result values in
recurrences, making it a challenge to compute them in
parallel. We present a new work- and space-efficient
algorithm to compute linear recurrences that is
amenable to automatic parallelization and suitable for
hierarchical massively-parallel architectures such as
GPUs. We implemented our approach in a domain-specific
code generator that emits optimized CUDA code. Our
evaluation shows that, for standard prefix sums and
single-stage IIR filters, the generated code reaches
the throughput of memory copy for large inputs, which
cannot be surpassed. On higher-order prefix sums, it
performs nearly as well as the fastest handwritten code
from the literature. On tuple-based prefix sums and
digital filters, our automatically parallelized code
outperforms the fastest prior implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Ginsbach:2018:AML,
author = "Philip Ginsbach and Toomas Remmelg and Michel Steuwer
and Bruno Bodin and Christophe Dubach and Michael F. P.
O'Boyle",
title = "Automatic Matching of Legacy Code to Heterogeneous
{APIs}: an Idiomatic Approach",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "139--153",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173182",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous accelerators often disappoint. They
provide the prospect of great performance, but only
deliver it when using vendor specific optimized
libraries or domain specific languages. This requires
considerable legacy code modifications, hindering the
adoption of heterogeneous computing. This paper
develops a novel approach to automatically detect
opportunities for accelerator exploitation. We focus on
calculations that are well supported by established
APIs: sparse and dense linear algebra, stencil codes
and generalized reductions and histograms. We call them
idioms and use a custom constraint-based Idiom
Description Language (IDL) to discover them within user
code. Detected idioms are then mapped to BLAS
libraries, cuSPARSE and clSPARSE and two DSLs: Halide
and Lift. We implemented the approach in LLVM and
evaluated it on the NAS and Parboil sequential C/C++
benchmarks, where we detect 60 idiom instances. In
those cases where idioms are a significant part of the
sequential execution time, we generate code that
achieves 1.26x to over 20x speedup on integrated and
external GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Wang:2018:UAA,
author = "Shu Wang and Chi Li and Henry Hoffmann and Shan Lu and
William Sentosa and Achmad Imam Kistijantoro",
title = "Understanding and Auto-Adjusting Performance-Sensitive
Configurations",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "154--168",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173206",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern software systems are often equipped with
hundreds to thousands of configurations, many of which
greatly affect performance. Unfortunately, properly
setting these configurations is challenging for
developers due to the complex and dynamic nature of
system workload and environment. In this paper, we
first conduct an empirical study to understand
performance-sensitive configurations and the challenges
of setting them in the real-world. Guided by our study,
we design a systematic and general control-theoretic
framework, SmartConf, to automatically set and
dynamically adjust performance-sensitive configurations
to meet required operating constraints while optimizing
other performance metrics. Evaluation shows that
SmartConf is effective in solving real-world
configuration problems, often providing better
performance than even the best static configuration
developers can choose under existing configuration
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Rahmani:2018:SFS,
author = "Amir M. Rahmani and Bryan Donyanavard and Tiago
M{\"u}ck and Kasra Moazzemi and Axel Jantsch and Onur
Mutlu and Nikil Dutt",
title = "{SPECTR}: Formal Supervisory Control and Coordination
for Many-core Systems Resource Management",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "169--183",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173199",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Resource management strategies for many-core systems
need to enable sharing of resources such as power,
processing cores, and memory bandwidth while
coordinating the priority and significance of system-
and application-level objectives at runtime in a
scalable and robust manner. State-of-the-art approaches
use heuristics or machine learning for resource
management, but unfortunately lack formalism in
providing robustness against unexpected corner cases.
While recent efforts deploy classical control-theoretic
approaches with some guarantees and formalism, they
lack scalability and autonomy to meet changing runtime
goals. We present SPECTR, a new resource management
approach for many-core systems that leverages formal
supervisory control theory (SCT) to combine the
strengths of classical control theory with
state-of-the-art heuristic approaches to efficiently
meet changing runtime goals. SPECTR is a scalable and
robust control architecture and a systematic design
flow for hierarchical control of many-core systems.
SPECTR leverages SCT techniques such as gain scheduling
to allow autonomy for individual controllers. It
facilitates automatic synthesis of the high-level
supervisory controller and its property verification.
We implement SPECTR on an Exynos platform containing
ARM's big.LITTLE-based heterogeneous multi-processor
(HMP) and demonstrate that SPECTR's use of SCT is key
to managing multiple interacting resources (e.g., chip
power and processing cores) in the presence of
competing objectives (e.g., satisfying QoS vs. power
capping). The principles of SPECTR are easily
applicable to any resource type and objective as long
as the management problem can be modeled using
dynamical systems theory (e.g., difference equations),
discrete-event dynamic systems, or fuzzy dynamics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Mishra:2018:CLC,
author = "Nikita Mishra and Connor Imes and John D. Lafferty and
Henry Hoffmann",
title = "{CALOREE}: Learning Control for Predictable Latency
and Low Energy",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "184--198",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173184",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many modern computing systems must provide reliable
latency with minimal energy. Two central challenges
arise when allocating system resources to meet these
conflicting goals: (1) complexity modern hardware
exposes diverse resources with complicated interactions
and (2) dynamics latency must be maintained despite
unpredictable changes in operating environment or
input. Machine learning accurately models the latency
of complex, interacting resources, but does not address
system dynamics; control theory adjusts to dynamic
changes, but struggles with complex resource
interaction. We therefore propose CALOREE, a resource
manager that learns key control parameters to meet
latency requirements with minimal energy in complex,
dynamic environments. CALOREE breaks resource
allocation into two sub-tasks: learning how interacting
resources affect speedup, and controlling speedup to
meet latency requirements with minimal energy. CALOREE
defines a general control system whose parameters are
customized by a learning framework while maintaining
control-theoretic formal guarantees that the latency
goal will be met. We test CALOREE's ability to deliver
reliable latency on heterogeneous ARM big.LITTLE
architectures in both single and multi-application
scenarios. Compared to the best prior learning and
control solutions, CALOREE reduces deadline misses by
60\% and energy consumption by 13\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Turakhia:2018:DGC,
author = "Yatish Turakhia and Gill Bejerano and William J.
Dally",
title = "{Darwin}: a Genomics Co-processor Provides up to $ 15
\, 000 \times $ Acceleration on Long Read Assembly",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "199--213",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173193",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Genomics is transforming medicine and our
understanding of life in fundamental ways. Genomics
data, however, is far outpacing Moore's Law.
Third-generation sequencing technologies produce 100X
longer reads than second generation technologies and
reveal a much broader mutation spectrum of disease and
evolution. However, these technologies incur
prohibitively high computational costs. Over 1,300 CPU
hours are required for reference-guided assembly of the
human genome, and over 15,600 CPU hours are required
for de novo assembly. This paper describes ``Darwin''
--- a co-processor for genomic sequence alignment that,
without sacrificing sensitivity, provides up to $ 15 \,
000 \times $ speedup over the state-of-the-art software
for reference-guided assembly of third-generation
reads. Darwin achieves this speedup through
hardware/algorithm co-design, trading more easily
accelerated alignment for less memory-intensive
filtering, and by optimizing the memory system for
filtering. Darwin combines a hardware-accelerated
version of D-SOFT, a novel filtering algorithm,
alignment at high speed, and with a
hardware-accelerated version of GACT, a novel alignment
algorithm. GACT generates near-optimal alignments of
arbitrarily long genomic sequences using constant
memory for the compute-intensive step. Darwin is
adaptable, with tunable speed and sensitivity to match
emerging sequencing technologies and to meet the
requirements of genomic applications beyond read
assembly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Zha:2018:LSM,
author = "Yue Zha and Jing Li",
title = "{Liquid Silicon-Monona}: a Reconfigurable
Memory-Oriented Computing Fabric with Scalable
Multi-Context Support",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "214--228",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173167",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "With the recent trend of promoting Field-Programmable
Gate Arrays (FPGAs) to first-class citizens in
accelerating compute-intensive applications in
networking, cloud services and artificial intelligence,
FPGAs face two major challenges in sustaining
competitive advantages in performance and energy
efficiency for diverse cloud workloads: (1) limited
configuration capability for supporting light-weight
computations/on-chip data storage to accelerate
emerging search-/data-intensive applications. (2) lack
of architectural support to hide reconfiguration
overhead for assisting virtualization in a cloud
computing environment. In this paper, we propose a
reconfigurable memory-oriented computing fabric, namely
Liquid Silicon-Monona (L-Si), enabled by emerging
nonvolatile memory technology i.e. RRAM, to address
these two challenges. Specifically, L-Si addresses the
first challenge by virtue of a new architecture
comprising a 2D array of physically identical but
functionally-configurable building blocks. It, for the
first time, extends the configuration capabilities of
existing FPGAs from computation to the whole spectrum
ranging from computation to data storage. It allows
users to better customize hardware by flexibly
partitioning hardware resources between computation and
memory, greatly benefiting emerging search- and
data-intensive applications. To address the second
challenge, L-Si provides scalable multi-context
architectural support to minimize reconfiguration
overhead for assisting virtualization. In addition, we
provide compiler support to facilitate the programming
of applications written in high-level programming
languages (e.g. OpenCL) and frameworks (e.g.
TensorFlow, MapReduce) while fully exploiting the
unique architectural capability of L-Si. Our evaluation
results show L-Si achieves 99.6\% area reduction, 1.43$
\times $ throughput improvement and 94.0\% power
reduction on search-intensive benchmarks, as compared
with the FPGA baseline. For neural network benchmarks,
on average, L-Si achieves 52.3$ \times $ speedup,
113.9$ \times $ energy reduction and 81\% area
reduction over the FPGA baseline. In addition, the
multi-context architecture of L-Si reduces the context
switching time to --- 10ns, compared with an
off-the-shelf FPGA (~100ms), greatly facilitating
virtualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Achour:2018:TDC,
author = "Sara Achour and Martin Rinard",
title = "Time Dilation and Contraction for Programmable Analog
Devices with {Jaunt}",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "229--242",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173179",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programmable analog devices are a powerful new
computing substrate that are especially appropriate for
performing computationally intensive simulations of
neuromorphic and cytomorphic models. Current state of
the art techniques for configuring analog devices to
simulate dynamical systems do not consider the current
and voltage operating ranges of analog device
components or the sampling limitations of the digital
interface of the device. We present Jaunt, a new solver
that scales the values that configure the analog device
to ensure the resulting analog computation executes
within the operating constraints of the device,
preserves the recoverable dynamics of the original
simulation, and executes slowly enough to observe these
dynamics at the sampled digital outputs. Our results
show that, on a set of benchmark biological
simulations, (1) unscaled configurations produce
incorrect simulations because they violate the
operating ranges of the device and (2) Jaunt delivers
scaled configurations that respect the operating ranges
to produce correct simulations with observable
dynamics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Dai:2018:EDT,
author = "Yuting Dai and Tao Li and Benyong Liu and Mingcong
Song and Huixiang Chen",
title = "Exploiting Dynamic Thermal Energy Harvesting for
Reusing in {Smartphone} with Mobile Applications",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "243--256",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173188",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Recently, mobile applications have gradually become
performance- and resource- intensive, which results in
a massive battery power drain and high surface
temperature, and further degrades the user experience.
Thus, high power consumption and surface over-heating
have been considered as a severe challenge to
smartphone design. In this paper, we propose DTEHR, a
mobile Dynamic Thermal Energy Harvesting Reusing
framework to tackle this challenge. The approach is
sustainable in that it generates energy using dynamic
Thermoelectric Generators (TEGs). The generated energy
not only powers Thermoelectric Coolers (TECs) for
cooling down hot-spots, but also recharges
micro-supercapacitors (MSCs) for extended smartphone
usage. To analyze thermal characteristics and evaluate
DTEHR across real-world applications, we build MPPTAT
(Multi-comPonent Power and Thermal Analysis Tool), a
power and thermal analyzing tool for Android. The
result shows that DTEHR reduces the temperature
differences between hot areas and cold areas up to
15.4${}^\circ $C (internal) and 7${}^\circ $C
(surface). With TEC-based hot-spots cooling, DTEHR
reduces the temperature of the surface and internal
hot-spots by an average of 8${}^\circ $ and 12.8mW
respectively. With dynamic TEGs, DTEHR generates
2.7-15mW power, more than hundreds of times of power
that TECs need to cool down hot-spots. Thus,
extra-generated power can be stored into MSCs to
prolong battery life.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Hu:2018:SDE,
author = "Yongjian Hu and Iulian Neamtiu",
title = "Static Detection of Event-based Races in {Android}
Apps",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "257--270",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173173",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Event-based races are the main source of concurrency
errors in Android apps. Prior approaches for scalable
detection of event-based races have been dynamic. Due
to their dynamic nature, these approaches suffer from
coverage and false negative issues. We introduce a
precise and scalable static approach and tool, named
SIERRA, for detecting Android event-based races. SIERRA
is centered around a new concept of ``concurrency
action'' (that reifies threads, events/messages, system
and user actions) and statically-derived order
(happens-before relation) between actions. Establishing
action order is complicated in Android, and event-based
systems in general, because of externally-orchestrated
control flow, use of callbacks, asynchronous tasks, and
ad-hoc synchronization. We introduce several novel
approaches that enable us to infer order relations
statically: auto-generated code models which impose
order among lifecycle and GUI events; a novel context
abstraction for event-driven programs named
action-sensitivity and finally, on-demand path
sensitivity via backward symbolic execution to further
rule out false positives. We have evaluated SIERRA on
194 Android apps. Of these, we chose 20 apps for manual
analysis and comparison with a state-of-the-art dynamic
race detector. Experimental results show that SIERRA is
effective and efficient, typically taking 960 seconds
to analyze an app and revealing 43 potential races.
Compared with the dynamic race detector, SIERRA
discovered an average 29.5 true races with 3.5 false
positives, where the dynamic detector only discovered 4
races (hence missing 25.5 races per app) --- this
demonstrates the advantage of a precise static
approach. We believe that our approach opens the way
for precise analysis and static event race detection in
other event-driven systems beyond Android.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Guo:2018:PCA,
author = "Peizhen Guo and Wenjun Hu",
title = "{Potluck}: Cross-Application Approximate Deduplication
for Computation-Intensive Mobile Applications",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "271--284",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173185",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging mobile applications, such as cognitive
assistance and augmented reality (AR) based gaming, are
increasingly computation-intensive and
latency-sensitive, while running on
resource-constrained devices. The standard approaches
to addressing these involve either offloading to a
cloud(let) or local system optimizations to speed up
the computation, often trading off computation quality
for low latency. Instead, we observe that these
applications often operate on similar input data from
the camera feed and share common processing components,
both within the same (type of) applications and across
different ones. Therefore, deduplicating processing
across applications could deliver the best of both
worlds. In this paper, we present Potluck, to achieve
approximate deduplication. At the core of the system is
a cache service that stores and shares processing
results between applications and a set of algorithms to
process the input data to maximize deduplication
opportunities. This is implemented as a background
service on Android. Extensive evaluation shows that
Potluck can reduce the processing latency for our AR
and vision workloads by a factor of 2.5 to 10.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Chong:2018:QCG,
author = "Frederic T. Chong",
title = "Quantum Computing is Getting Real: Architecture, {PL},
and {OS} Roles in Closing the Gap between Quantum
Algorithms and Machines",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "285--285",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177152",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Quantum computing is at an inflection point, where
50-qubit (quantum bit) machines have been built,
100-qubit machines are just around the corner, and even
1000-qubit machines are perhaps only a few years away.
These machines have the potential to fundamentally
change our concept of what is computable and
demonstrate practical applications in areas such as
quantum chemistry, optimization, and quantum
simulation. Yet a significant resource gap remains
between practical quantum algorithms and real machines.
There is an urgent shortage of the necessary computer
scientists to work on software and architectures to
close this gap. I will outline several grand research
challenges in closing this gap, including programming
language design, software and hardware verification,
defining and perforating abstraction boundaries,
cross-layer optimization, managing parallelism and
communication, mapping and scheduling computations,
reducing control complexity, machine-specific
optimizations, learning error patterns, and many more.
I will also describe the resources and infrastructure
available for starting research in quantum computing
and for tackling these challenges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{DeLozier:2018:SSO,
author = "Christian DeLozier and Ariel Eizenberg and Brandon
Lucia and Joseph Devietti",
title = "{SOFRITAS}: Serializable Ordering-Free Regions for
Increasing Thread Atomicity Scalably",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "286--300",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173192",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Correctly synchronizing multithreaded programs is
challenging and errors can lead to program failures
such as atomicity violations. Existing strong memory
consistency models rule out some possible failures, but
are limited by depending on programmer-defined locking
code. We present the new Ordering-Free Region (OFR)
serializability consistency model that ensures
atomicity for OFRs, which are spans of dynamic
instructions between consecutive ordering constructs
(e.g., barriers), without breaking atomicity at lock
operations. Our platform, Serializable Ordering-Free
Regions for Increasing Thread Atomicity Scalably
(SOFRITAS), ensures a C/C++ program's execution is
equivalent to a serialization of OFRs by default. We
build two systems that realize the SOFRITAS idea: a
concurrency bug finding tool for testing called
SOFRITEST, and a production runtime system called
SOPRO. SOFRITEST uses OFRs to find concurrency bugs,
including a multi-critical-section atomicity violation
in memcached that weaker consistency models will miss.
If OFR's are too coarse-grained, SOFRITEST suggests
refinement annotations automatically. Our software-only
SOPRO implementation has high performance, scales well
with increased parallelism, and prevents failures
despite bugs in locking code. SOFRITAS has an average
overhead of just 1.59x on a single-threaded execution
and 1.51x on sixteen threads, despite pthreads' much
weaker memory model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Markuze:2018:DOF,
author = "Alex Markuze and Igor Smolyar and Adam Morrison and
Dan Tsafrir",
title = "{DAMN}: Overhead-Free {IOMMU} Protection for
Networking",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "301--315",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173175",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "DMA operations can access memory buffers only if they
are ``mapped'' in the IOMMU, so operating systems
protect themselves against malicious/errant network
DMAs by mapping and unmapping each packet immediately
before/after it is DMAed. This approach was recently
found to be riskier and less performant than keeping
packets non-DMAable and instead copying their content
to/from permanently-mapped buffers. Still, the extra
copy hampers performance of multi-gigabit networking.
We observe that achieving protection at the DMA (un)map
boundary is needlessly constraining, as devices must be
prevented from changing the data only after the kernel
reads it. So there is no real need to switch ownership
of buffers between kernel and device at the DMA
(un)mapping layer, as opposed to the approach taken by
all existing IOMMU protection schemes. We thus
eliminate the extra copy by (1)~implementing a new
allocator called DMA-Aware Malloc for Networking
(DAMN), which (de)allocates packet buffers from a
memory pool permanently mapped in the IOMMU;
(2)~modifying the network stack to use this allocator;
and (3)~copying packet data only when the kernel needs
it, which usually morphs the aforementioned extra copy
into the kernel's standard copy operation performed at
the user-kernel boundary. DAMN thus provides full IOMMU
protection with performance comparable to that of an
unprotected system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Boroumand:2018:GWC,
author = "Amirali Boroumand and Saugata Ghose and Youngsok Kim
and Rachata Ausavarungnirun and Eric Shiu and Rahul
Thakur and Daehyun Kim and Aki Kuusela and Allan Knies
and Parthasarathy Ranganathan and Onur Mutlu",
title = "{Google} Workloads for Consumer Devices: Mitigating
Data Movement Bottlenecks",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "316--331",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173177",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We are experiencing an explosive growth in the number
of consumer devices, including smartphones, tablets,
web-based computers such as Chromebooks, and wearable
devices. For this class of devices, energy efficiency
is a first-class concern due to the limited battery
capacity and thermal power budget. We find that data
movement is a major contributor to the total system
energy and execution time in consumer devices. The
energy and performance costs of moving data between the
memory system and the compute units are significantly
higher than the costs of computation. As a result,
addressing data movement is crucial for consumer
devices. In this work, we comprehensively analyze the
energy and performance impact of data movement for
several widely-used Google consumer workloads: (1) the
Chrome web browser; (2) TensorFlow Mobile, Google's
machine learning framework; (3) video playback, and (4)
video capture, both of which are used in many video
services such as YouTube and Google Hangouts. We find
that processing-in-memory (PIM) can significantly
reduce data movement for all of these workloads, by
performing part of the computation close to memory.
Each workload contains simple primitives and functions
that contribute to a significant amount of the overall
data movement. We investigate whether these primitives
and functions are feasible to implement using PIM,
given the limited area and power constraints of
consumer devices. Our analysis shows that offloading
these primitives to PIM logic, consisting of either
simple cores or specialized accelerators, eliminates a
large amount of data movement, and significantly
reduces total system energy (by an average of 55.4\%
across the workloads) and execution time (by an average
of 54.2\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Wen:2018:WSI,
author = "Shasha Wen and Xu Liu and John Byrne and Milind
Chabbi",
title = "Watching for Software Inefficiencies with {Witch}",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "332--347",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177159",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Inefficiencies abound in complex, layered software. A
variety of inefficiencies show up as wasteful memory
operations. Many existing tools instrument every load
and store instruction to monitor memory, which
significantly slows execution and consumes enormously
extra memory. Our lightweight framework, Witch, samples
consecutive accesses to the same memory location by
exploiting two ubiquitous hardware features: the
performance monitoring units (PMU) and debug registers.
Witch performs no instrumentation. Hence,
witchcraft---tools built atop Witch---can detect a
variety of software inefficiencies while introducing
negligible slowdown and insignificant memory
consumption and yet maintaining accuracy comparable to
exhaustive instrumentation tools. Witch allowed us to
scale our analysis to a large number of code bases.
Guided by witchcraft, we detected several performance
problems in important code bases; eliminating these
inefficiencies resulted in significant speedups.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Devecsery:2018:OHA,
author = "David Devecsery and Peter M. Chen and Jason Flinn and
Satish Narayanasamy",
title = "Optimistic Hybrid Analysis: Accelerating Dynamic
Analysis through Predicated Static Analysis",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "348--362",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177153",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic analysis tools, such as those that detect
data-races, verify memory safety, and identify
information flow, have become a vital part of testing
and debugging complex software systems. While these
tools are powerful, their slow speed often limits how
effectively they can be deployed in practice. Hybrid
analysis speeds up these tools by using static analysis
to decrease the work performed during dynamic analysis.
In this paper we argue that current hybrid analysis is
needlessly hampered by an incorrect assumption that
preserving the soundness of dynamic analysis requires
an underlying sound static analysis. We observe that,
even with unsound static analysis, it is possible to
achieve sound dynamic analysis for the executions which
fall within the set of states statically considered.
This leads us to a new approach, called optimistic
hybrid analysis. We first profile a small set of
executions and generate a set of likely invariants that
hold true during most, but not necessarily all,
executions. Next, we apply a much more precise, but
unsound, static analysis that assumes these invariants
hold true. Finally, we run the resulting dynamic
analysis speculatively while verifying whether the
assumed invariants hold true during that particular
execution; if not, the program is reexecuted with a
traditional hybrid analysis. Optimistic hybrid analysis
is as precise and sound as traditional dynamic
analysis, but is typically much faster because (1)
unsound static analysis can speed up dynamic analysis
much more than sound static analysis can and (2)
verifications rarely fail. We apply optimistic hybrid
analysis to race detection and program slicing and
achieve 1.8x over a state-of-the-art race detector
(FastTrack) optimized with traditional hybrid analysis
and 8.3x over a hybrid backward slicer (Giri).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Katz:2018:SRC,
author = "Omer Katz and Noam Rinetzky and Eran Yahav",
title = "Statistical Reconstruction of Class Hierarchies in
Binaries",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "363--376",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173202",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We address a fundamental problem in reverse
engineering of object-oriented code: the reconstruction
of a program's class hierarchy from its stripped
binary. Existing approaches rely heavily on structural
information that is not always available, e.g., calls
to parent constructors. As a result, these approaches
often leave gaps in the hierarchies they construct, or
fail to construct them altogether. Our main insight is
that behavioral information can be used to infer
subclass/superclass relations, supplementing any
missing structural information. Thus, we propose the
first statistical approach for static reconstruction of
class hierarchies based on behavioral similarity. We
capture the behavior of each type using a statistical
language model (SLM), define a metric for pairwise
similarity between types based on the Kullback--Leibler
divergence between their SLMs, and lift it to determine
the most likely class hierarchy. We implemented our
approach in a tool called ROCK and used it to
automatically reconstruct the class hierarchies of
several real-world stripped C++ binaries. Our results
demonstrate that ROCK obtained significantly more
accurate class hierarchies than those obtained using
structural analysis alone.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Rigger:2018:STA,
author = "Manuel Rigger and Roland Schatz and Ren{\'e} Mayrhofer
and Matthias Grimmer and Hanspeter M{\"o}ssenb{\"o}ck",
title = "{Sulong}, and Thanks for All the Bugs: Finding Errors
in {C} Programs by Abstracting from the Native
Execution Model",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "377--391",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173174",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In C, memory errors, such as buffer overflows, are
among the most dangerous software errors; as we show,
they are still on the rise. Current dynamic bug-finding
tools that try to detect such errors are based on the
low-level execution model of the underlying machine.
They insert additional checks in an ad-hoc fashion,
which makes them prone to omitting checks for corner
cases. To address this, we devised a novel approach to
finding bugs during the execution of a program. At the
core of this approach is an interpreter written in a
high-level language that performs automatic checks
(such as bounds, NULL, and type checks). By mapping
data structures in C to those of the high-level
language, accesses are automatically checked and bugs
discovered. We have implemented this approach and show
that our tool (called Safe Sulong) can find bugs that
state-of-the-art tools overlook, such as out-of-bounds
accesses to the main function arguments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{David:2018:FPS,
author = "Yaniv David and Nimrod Partush and Eran Yahav",
title = "{FirmUp}: Precise Static Detection of Common
Vulnerabilities in Firmware",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "392--404",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177157",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a static, precise, and scalable technique
for finding CVEs (Common Vulnerabilities and Exposures)
in stripped firmware images. Our technique is able to
efficiently find vulnerabilities in real-world firmware
with high accuracy. Given a vulnerable procedure in an
executable binary and a firmware image containing
multiple stripped binaries, our goal is to detect
possible occurrences of the vulnerable procedure in the
firmware image. Due to the variety of architectures and
unique tool chains used by vendors, as well as the
highly customized nature of firmware, identifying
procedures in stripped firmware is extremely
challenging. Vulnerability detection requires not only
pairwise similarity between procedures but also
information about the relationships between procedures
in the surrounding executable. This observation serves
as the foundation for a novel technique that
establishes a partial correspondence between procedures
in the two binaries. We implemented our technique in a
tool called FirmUp and performed an extensive
evaluation over 40 million procedures, over 4 different
prevalent architectures, crawled from public vendor
firmware images. We discovered 373 vulnerabilities
affecting publicly available firmware, 147 of them in
the latest available firmware version for the device. A
thorough comparison of FirmUp to previous methods shows
that it accurately and effectively finds
vulnerabilities in firmware, while outperforming the
detection rate of the state of the art by 45\% on
average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Alglave:2018:FSC,
author = "Jade Alglave and Luc Maranget and Paul E. McKenney and
Andrea Parri and Alan Stern",
title = "Frightening Small Children and Disconcerting
Grown-ups: Concurrency in the {Linux} Kernel",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "405--418",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177156",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "Concurrency in the Linux kernel can be a contentious
topic. The Linux kernel mailing list features numerous
discussions related to consistency models, including
those of the more than 30 CPU architectures supported
by the kernel and that of the kernel itself. How are
Linux programs supposed to behave? Do they behave
correctly on exotic hardware? A formal model can help
address such questions. Better yet, an executable model
allows programmers to experiment with the model to
develop their intuition. Thus we offer a model written
in the cat language, making it not only formal, but
also executable by the herd simulator. We tested our
model against hardware and refined it in consultation
with maintainers. Finally, we formalised the
fundamental law of the Read-Copy-Update synchronisation
mechanism, and proved that one of its implementations
satisfies this law.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Liu:2018:FAD,
author = "Haopeng Liu and Xu Wang and Guangpu Li and Shan Lu and
Feng Ye and Chen Tian",
title = "{FCatch}: Automatically Detecting Time-of-fault Bugs
in Cloud Systems",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "419--431",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177161",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "It is crucial for distributed systems to achieve high
availability. Unfortunately, this is challenging given
the common component failures (i.e., faults).
Developers often cannot anticipate all the timing
conditions and system states under which a fault might
occur, and introduce time-of-fault (TOF) bugs that only
manifest when a node crashes or a message drops at a
special moment. Although challenging, detecting TOF
bugs is fundamental to developing highly available
distributed systems. Unlike previous work that relies
on fault injection to expose TOF bugs, this paper
carefully models TOF bugs as a new type of concurrency
bugs, and develops FCatch to automatically predict TOF
bugs by observing correct execution. Evaluation on
representative cloud systems shows that FCatch is
effective, accurately finding severe TOF bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Deiana:2018:UPN,
author = "Enrico A. Deiana and Vincent St-Amour and Peter A.
Dinda and Nikos Hardavellas and Simone Campanoni",
title = "Unconventional Parallelization of Nondeterministic
Applications",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "432--447",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173181",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The demand for thread-level-parallelism (TLP) on
commodity processors is endless as it is essential for
gaining performance and saving energy. However, TLP in
today's programs is limited by dependences that must be
satisfied at run time. We have found that for
nondeterministic programs, some of these actual
dependences can be satisfied with alternative data that
can be generated in parallel, thus boosting the
program's TLP. Satisfying these dependences with
alternative data nonetheless produces final outputs
that match those of the original nondeterministic
program. To demonstrate the practicality of our
technique, we describe the design, implementation, and
evaluation of our compilers, autotuner, profiler, and
runtime, which are enabled by our proposed C++
programming language extensions. The resulting system
boosts the performance of six well-known
nondeterministic and multi-threaded benchmarks by
158.2\% (geometric mean) on a 28-core Intel-based
platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Ji:2018:BGB,
author = "Yu Ji and Youhui Zhang and Wenguang Chen and Yuan
Xie",
title = "Bridge the Gap between Neural Networks and
Neuromorphic Hardware with a Neural Network Compiler",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "448--460",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173205",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Different from developing neural networks (NNs) for
general-purpose processors, the development for NN
chips usually faces with some hardware-specific
restrictions, such as limited precision of network
signals and parameters, constrained computation scale,
and limited types of non-linear functions. This paper
proposes a general methodology to address the
challenges. We decouple the NN applications from the
target hardware by introducing a compiler that can
transform an existing trained, unrestricted NN into an
equivalent network that meets the given hardware's
constraints. We propose multiple techniques to make the
transformation adaptable to different kinds of NN
chips, and reliable for restrict hardware constraints.
We have built such a software tool that supports both
spiking neural networks (SNNs) and traditional
artificial neural networks (ANNs). We have demonstrated
its effectiveness with a fabricated neuromorphic chip
and a processing-in-memory (PIM) design. Tests show
that the inference error caused by this solution is
insignificant and the transformation time is much
shorter than the retraining time. Also, we have studied
the parameter-sensitivity evaluations to explore the
tradeoffs between network error and resource
utilization for different transformation strategies,
which could provide insights for co-design optimization
of neuromorphic hardware and software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Kwon:2018:MEF,
author = "Hyoukjun Kwon and Ananda Samajdar and Tushar Krishna",
title = "{MAERI}: Enabling Flexible Dataflow Mapping over {DNN}
Accelerators via Reconfigurable Interconnects",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "461--475",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173176",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deep neural networks (DNN) have demonstrated highly
promising results across computer vision and speech
recognition, and are becoming foundational for
ubiquitous AI. The computational complexity of these
algorithms and a need for high energy-efficiency has
led to a surge in research on hardware accelerators. \%
for this paradigm. To reduce the latency and energy
costs of accessing DRAM, most DNN accelerators are
spatial in nature, with hundreds of processing elements
(PE) operating in parallel and communicating with each
other directly. DNNs are evolving at a rapid rate, and
it is common to have convolution, recurrent, pooling,
and fully-connected layers with varying input and
filter sizes in the most recent topologies.They may be
dense or sparse. They can also be partitioned in myriad
ways (within and across layers) to exploit data reuse
(weights and intermediate outputs). All of the above
can lead to different dataflow patterns within the
accelerator substrate. Unfortunately, most DNN
accelerators support only fixed dataflow patterns
internally as they perform a careful co-design of the
PEs and the network-on-chip (NoC). In fact, the
majority of them are only optimized for traffic within
a convolutional layer. This makes it challenging to map
arbitrary dataflows on the fabric efficiently, and can
lead to underutilization of the available compute
resources. DNN accelerators need to be programmable to
enable mass deployment. For them to be programmable,
they need to be configurable internally to support the
various dataflow patterns that could be mapped over
them. To address this need, we present MAERI, which is
a DNN accelerator built with a set of modular and
configurable building blocks that can easily support
myriad DNN partitions and mappings by appropriately
configuring tiny switches. MAERI provides 8-459\%
better utilization across multiple dataflow mappings
over baselines with rigid NoC fabrics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Cai:2018:VHA,
author = "Ruizhe Cai and Ao Ren and Ning Liu and Caiwen Ding and
Luhao Wang and Xuehai Qian and Massoud Pedram and
Yanzhi Wang",
title = "{VIBNN}: Hardware Acceleration of {Bayesian} Neural
Networks",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "476--488",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173212",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Bayesian Neural Networks (BNNs) have been proposed to
address the problem of model uncertainty in training
and inference. By introducing weights associated with
conditioned probability distributions, BNNs are capable
of resolving the overfitting issue commonly seen in
conventional neural networks and allow for small-data
training, through the variational inference process.
Frequent usage of Gaussian random variables in this
process requires a properly optimized Gaussian Random
Number Generator (GRNG). The high hardware cost of
conventional GRNG makes the hardware implementation of
BNNs challenging. In this paper, we propose VIBNN, an
FPGA-based hardware accelerator design for variational
inference on BNNs. We explore the design space for
massive amount of Gaussian variable sampling tasks in
BNNs. Specifically, we introduce two high performance
Gaussian (pseudo) random number generators: (1) the
RAM-based Linear Feedback Gaussian Random Number
Generator (RLF-GRNG), which is inspired by the
properties of binomial distribution and linear feedback
logics; and (2) the Bayesian Neural Network-oriented
Wallace Gaussian Random Number Generator. To achieve
high scalability and efficient memory access, we
propose a deep pipelined accelerator architecture with
fast execution and good hardware utilization.
Experimental results demonstrate that the proposed
VIBNN implementations on an FPGA can achieve throughput
of 321,543.4 Images/s and energy efficiency upto
52,694.8 Images/J while maintaining similar accuracy as
its software counterpart.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Sadrosadati:2018:LEH,
author = "Mohammad Sadrosadati and Amirhossein Mirhosseini and
Seyed Borna Ehsani and Hamid Sarbazi-Azad and Mario
Drumond and Babak Falsafi and Rachata Ausavarungnirun
and Onur Mutlu",
title = "{LTRF}: Enabling High-Capacity Register Files for
{GPUs} via Hardware\slash Software Cooperative Register
Prefetching",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "489--502",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173211",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphics Processing Units (GPUs) employ large register
files to accommodate all active threads and accelerate
context switching. Unfortunately, register files are a
scalability bottleneck for future GPUs due to long
access latency, high power consumption, and large
silicon area provisioning. Prior work proposes
hierarchical register file, to reduce the register file
power consumption by caching registers in a smaller
register file cache. Unfortunately, this approach does
not improve register access latency due to the low hit
rate in the register file cache. In this paper, we
propose the Latency-Tolerant Register File (LTRF)
architecture to achieve low latency in a two-level
hierarchical structure while keeping power consumption
low. We observe that compile-time interval analysis
enables us to divide GPU program execution into
intervals with an accurate estimate of a warp's
aggregate register working-set within each interval.
The key idea of LTRF is to prefetch the estimated
register working-set from the main register file to the
register file cache under software control, at the
beginning of each interval, and overlap the prefetch
latency with the execution of other warps. Our
experimental results show that LTRF enables
high-capacity yet long-latency main GPU register files,
paving the way for various optimizations. As an example
optimization, we implement the main register file with
emerging high-density high-latency memory technologies,
enabling 8X larger capacity and improving overall GPU
performance by 31\% while reducing register file power
consumption by 46\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Ausavarungnirun:2018:MRG,
author = "Rachata Ausavarungnirun and Vance Miller and Joshua
Landgraf and Saugata Ghose and Jayneel Gandhi and
Adwait Jog and Christopher J. Rossbach and Onur Mutlu",
title = "{MASK}: Redesigning the {GPU} Memory Hierarchy to
Support Multi-Application Concurrency",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "503--518",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173169",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphics Processing Units (GPUs) exploit large amounts
of threadlevel parallelism to provide high instruction
throughput and to efficiently hide long-latency stalls.
The resulting high throughput, along with continued
programmability improvements, have made GPUs an
essential computational resource in many domains.
Applications from different domains can have vastly
different compute and memory demands on the GPU. In a
large-scale computing environment, to efficiently
accommodate such wide-ranging demands without leaving
GPU resources underutilized, multiple applications can
share a single GPU, akin to how multiple applications
execute concurrently on a CPU. Multi-application
concurrency requires several support mechanisms in both
hardware and software. One such key mechanism is
virtual memory, which manages and protects the address
space of each application. However, modern GPUs lack
the extensive support for multi-application concurrency
available in CPUs, and as a result suffer from high
performance overheads when shared by multiple
applications, as we demonstrate. We perform a detailed
analysis of which multi-application concurrency support
limitations hurt GPU performance the most. We find that
the poor performance is largely a result of the virtual
memory mechanisms employed in modern GPUs. In
particular, poor address translation performance is a
key obstacle to efficient GPU sharing. State-of-the-art
address translation mechanisms, which were designed for
single-application execution, experience significant
inter-application interference when multiple
applications spatially share the GPU. This contention
leads to frequent misses in the shared translation
lookaside buffer (TLB), where a single miss can induce
long-latency stalls for hundreds of threads. As a
result, the GPU often cannot schedule enough threads to
successfully hide the stalls, which diminishes system
throughput and becomes a first-order performance
concern. Based on our analysis, we propose MASK, a new
GPU framework that provides low-overhead virtual memory
support for the concurrent execution of multiple
applications. MASK consists of three novel
address-translation-aware cache and memory management
mechanisms that work together to largely reduce the
overhead of address translation: (1) a token-based
technique to reduce TLB contention, (2) a bypassing
mechanism to improve the effectiveness of cached
address translations, and (3) an application-aware
memory scheduling scheme to reduce the interference
between address translation and data requests. Our
evaluations show that MASK restores much of the
throughput lost to TLB contention. Relative to a
state-of-the-art GPU TLB, MASK improves system
throughput by 57.8\%, improves IPC throughput by
43.4\%, and reduces application-level unfairness by
22.4\%. MASK's system throughput is within 23.2\% of an
ideal GPU system with no address translation
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Yao:2018:SSG,
author = "Zhihao Yao and Zongheng Ma and Yingtong Liu and
Ardalan Amiri Sani and Aparna Chandramowlishwaran",
title = "{Sugar}: Secure {GPU} Acceleration in {Web} Browsers",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "519--534",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173186",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Modern personal computers have embraced increasingly
powerful Graphics Processing Units (GPUs). Recently,
GPU-based graphics acceleration in web apps (i.e.,
applications running inside a web browser) has become
popular. WebGL is the main effort to provide
OpenGL-like graphics for web apps and it is currently
used in 53\% of the top-100 websites. Unfortunately,
WebGL has posed serious security concerns as several
attack vectors have been demonstrated through WebGL.
Web browsers' solutions to these attacks have been
reactive: discovered vulnerabilities have been patched
and new runtime security checks have been added.
Unfortunately, this approach leaves the system
vulnerable to zero-day vulnerability exploits,
especially given the large size of the Trusted
Computing Base of the graphics plane. We present Sugar,
a novel operating system solution that enhances the
security of GPU acceleration for web apps by design.
The key idea behind Sugar is using a dedicated virtual
graphics plane for a web app by leveraging modern GPU
virtualization solutions. A virtual graphics plane
consists of a dedicated virtual GPU (or vGPU) as well
as all the software graphics stack (including the
device driver). Sugar enhances the system security
since a virtual graphics plane is fully isolated from
the rest of the system. Despite GPU virtualization
overhead, we show that Sugar achieves high performance.
Moreover, unlike current systems, Sugar is able to use
two underlying physical GPUs, when available, to
co-render the User Interface (UI): one GPU is used to
provide virtual graphics planes for web apps and the
other to provide the primary graphics plane for the
rest of the system. Such a design not only provides
strong security guarantees, it also provides enhanced
performance isolation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Hsu:2018:SRP,
author = "Chang-Hong Hsu and Qingyuan Deng and Jason Mars and
Lingjia Tang",
title = "{SmoothOperator}: Reducing Power Fragmentation and
Improving Power Utilization in Large-scale
Datacenters",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "535--548",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173190",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the ever growing popularity of cloud computing
and web services, Internet companies are in need of
increased computing capacity to serve the demand.
However, power has become a major limiting factor
prohibiting the growth in industry: it is often the
case that no more servers can be added to datacenters
without surpassing the capacity of the existing power
infrastructure. In this work, we first investigate the
power utilization in Facebook datacenters. We observe
that the combination of provisioning for peak power
usage, highly fluctuating traffic, and multi-level
power delivery infrastructure leads to significant
power budget fragmentation problem and inefficiently
low power utilization. To address this issue, our
insight is that heterogeneity of power consumption
patterns among different services provides
opportunities to re-shape the power profile of each
power node by re-distributing services. By grouping
services with asynchronous peak times under the same
power node, we can reduce the peak power of each node
and thus creating more power head-rooms to allow more
servers hosted, achieving higher throughput. Based on
this insight, we develop a workload-aware service
placement framework to systematically spread the
service instances with synchronous power patterns
evenly under the power supply tree, greatly reducing
the peak power draw at power nodes. We then leverage
dynamic power profile reshaping to maximally utilize
the headroom unlocked by our placement framework. Our
experiments based on real production workload and power
traces show that we are able to host up to 13\% more
machines in production, without changing the underlying
power infrastructure. Utilizing the unleashed power
headroom with dynamic reshaping, we achieve up to an
estimated total of 15\% and 11\% throughput improvement
for latency-critical service and batch service
respectively at the same time, with up to 44\% of
energy slack reduction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Lee:2018:WPE,
author = "Jaewon Lee and Changkyu Kim and Kun Lin and Liqun
Cheng and Rama Govindaraju and Jangwoo Kim",
title = "{WSMeter}: a Performance Evaluation Methodology for
{Google}'s Production Warehouse-Scale Computers",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "549--563",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173196",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Evaluating the comprehensive performance of a
warehouse-scale computer (WSC) has been a long-standing
challenge. Traditional load-testing benchmarks become
ineffective because they cannot accurately reproduce
the behavior of thousands of distinct jobs co-located
on a WSC. We therefore evaluate WSCs using actual job
behaviors in live production environments. From our
experience of developing multiple generations of WSCs,
we identify two major challenges of this approach: (1)
the lack of a holistic metric that incorporates
thousands of jobs and summarizes the performance, and
(2) the high costs and risks of conducting an
evaluation in a live environment. To address these
challenges, we propose WSMeter, a cost-effective
methodology to accurately evaluate a WSC's performance
using a live production environment. We first define a
new metric which accurately represents a WSC's overall
performance, taking a wide variety of unevenly
distributed jobs into account. We then propose a model
to statistically embrace the performance variance
inherent in WSCs, to conduct an evaluation with minimal
costs and risks. We present three real-world use cases
to prove the effectiveness of WSMeter. In the first two
cases, WSMeter accurately discerns 7\% and 1\%
performance improvements from WSC upgrades using only
0.9\% and 6.6\% of the machines in the WSCs,
respectively. We emphasize that naive statistical
comparisons incur much higher evaluation costs ($ < 4 $
times) and sometimes even fail to distinguish subtle
differences. The third case shows that a cloud customer
hosting two services on our WSC quantifies the
performance benefits of software optimization (+9.3\%)
with minimal overheads (2.3\% of the service
capacity).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Yu:2018:DAH,
author = "Zhibin Yu and Zhendong Bei and Xuehai Qian",
title = "Datasize-Aware High Dimensional Configurations
Auto-Tuning of In-Memory Cluster Computing",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "564--577",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173187",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In-Memory cluster Computing (IMC) frameworks (e.g.,
Spark) have become increasingly important because they
typically achieve more than 10$ \times $ speedups over
the traditional On-Disk cluster Computing (ODC)
frameworks for iterative and interactive applications.
Like ODC, IMC frameworks typically run the same given
programs repeatedly on a given cluster with similar
input dataset size each time. It is challenging to
build performance model for IMC program because: (1)
the performance of IMC programs is more sensitive to
the size of input dataset, which is known to be
difficult to be incorporated into a performance model
due to its complex effects on performance; (2) the
number of performance-critical configuration parameters
in IMC is much larger than ODC (more than 40 vs. around
10), the high dimensionality requires more
sophisticated models to achieve high accuracy. To
address this challenge, we propose DAC, a
datasize-aware auto-tuning approach to efficiently
identify the high dimensional configuration for a given
IMC program to achieve optimal performance on a given
cluster. DAC is a significant advance over the
state-of-the-art because it can take the size of input
dataset and 41 configuration parameters as the
parameters of the performance model for a given IMC
program, --- unprecedented in previous work. It is made
possible by two key techniques: (1) Hierarchical
Modeling (HM), which combines a number of individual
sub-models in a hierarchical manner; (2) Genetic
Algorithm (GA) is employed to search the optimal
configuration. To evaluate DAC, we use six typical
Spark programs, each with five different input dataset
sizes. The evaluation results show that DAC improves
the performance of six typical Spark programs, each
with five different input dataset sizes compared to
default configurations by a factor of 30.4x on average
and up to 89x. We also report that the geometric mean
speedups of DAC over configurations by default, expert,
and RFHOC are 15.4x, 2.3x, and 1.5x, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Ainsworth:2018:ETP,
author = "Sam Ainsworth and Timothy M. Jones",
title = "An Event-Triggered Programmable Prefetcher for
Irregular Workloads",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "578--592",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173189",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many modern workloads compute on large amounts of
data, often with irregular memory accesses. Current
architectures perform poorly for these workloads, as
existing prefetching techniques cannot capture the
memory access patterns; these applications end up
heavily memory-bound as a result. Although a number of
techniques exist to explicitly configure a prefetcher
with traversal patterns, gaining significant speedups,
they do not generalise beyond their target data
structures. Instead, we propose an event-triggered
programmable prefetcher combining the flexibility of a
general-purpose computational unit with an event-based
programming model, along with compiler techniques to
automatically generate events from the original source
code with annotations. This allows more complex
fetching decisions to be made, without needing to stall
when intermediate results are required. Using our
programmable prefetching system, combined with small
prefetch kernels extracted from applications, we
achieve an average 3.0x speedup in simulation for a
variety of graph, database and HPC workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Zhang:2018:MLO,
author = "Dan Zhang and Xiaoyu Ma and Michael Thomson and Derek
Chiou",
title = "{Minnow}: Lightweight Offload Engines for Worklist
Management and Worklist-Directed Prefetching",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "593--607",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173197",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The importance of irregular applications such as graph
analytics is rapidly growing with the rise of Big Data.
However, parallel graph workloads tend to perform
poorly on general-purpose chip multiprocessors (CMPs)
due to poor cache locality, low compute intensity,
frequent synchronization, uneven task sizes, and
dynamic task generation. At high thread counts,
execution time is dominated by worklist synchronization
overhead and cache misses. Researchers have proposed
hardware worklist accelerators to address scheduling
costs, but these proposals often harden a specific
scheduling policy and do not address high cache miss
rates. We address this with Minnow, a technique that
augments each core in a CMP with a lightweight Minnow
accelerator. Minnow engines offload worklist scheduling
from worker threads to improve scalability. The engines
also perform worklist-directed prefetching, a technique
that exploits knowledge of upcoming tasks to issue
nearly perfectly accurate and timely prefetch
operations. On a simulated 64-core CMP running a
parallel graph benchmark suite, Minnow improves
scalability and reduces L2 cache misses from 29 to 1.2
MPKI on average, resulting in 6.01x average speedup
over an optimized software baseline for only 1\% area
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Zhang:2018:WNA,
author = "Mingxing Zhang and Yongwei Wu and Youwei Zhuo and
Xuehai Qian and Chengying Huan and Kang Chen",
title = "{Wonderland}: a Novel Abstraction-Based Out-Of-Core
Graph Processing System",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "608--621",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173208",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many important graph applications are iterative
algorithms that repeatedly process the input graph
until convergence. For such algorithms, graph
abstraction is an important technique: although much
smaller than the original graph, it can bootstrap an
initial result that can significantly accelerate the
final convergence speed, leading to a better overall
performance. However, existing graph abstraction
techniques typically assume either fully in-memory or
distributed environment, which leads to many obstacles
preventing the application to an out-of-core graph
processing system. In this paper, we propose
Wonderland, a novel out-of-core graph processing system
based on abstraction. Wonderland has three unique
features: (1) A simple method applicable to out-of-core
systems allowing users to extract effective
abstractions from the original graph with acceptable
cost and a specific memory limit; (2)
Abstraction-enabled information propagation, where an
abstraction can be used as a bridge over the disjoint
on-disk graph partitions; (3) Abstraction guided
priority scheduling, where an abstraction can infer the
better priority-based order in processing on-disk graph
partitions. Wonderland is a significant advance over
the state-of-the-art because it not only makes graph
abstraction feasible to out-of-core systems, but also
broadens the applications of the concept in important
ways. Evaluation results of Wonderland reveal that
Wonderland achieves a drastic speedup over the other
state-of-the-art systems, up to two orders of magnitude
for certain cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Sabet:2018:TTI,
author = "Amir Hossein Nodehi Sabet and Junqiao Qiu and Zhijia
Zhao",
title = "{Tigr}: Transforming Irregular Graphs for
{GPU}-Friendly Graph Processing",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "622--636",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173180",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graph analytics delivers deep knowledge by processing
large volumes of highly connected data. In real-world
graphs, the degree distribution tends to follow the
power law --- a small portion of nodes own a large
number of neighbors. The high irregularity of degree
distribution acts as a major barrier to their efficient
processing on GPU architectures, which are primarily
designed for accelerating computations on regular data
with SIMD executions. Existing solutions to the
inefficiency of GPU-based graph analytics either modify
the graph programming abstraction or rely on changes to
the low-level thread execution models. The former
requires more programming efforts for designing and
maintaining graph analytics; while the latter couples
with the underlying architectures, making it difficult
to adapt as architectures quickly evolve. Unlike prior
efforts, this work proposes to address the above
fundamental problem at its origin --- the irregular
graph data itself. It raises a critical question in
irregular graph processing: Is it possible to transform
irregular graphs into more regular ones such that the
graphs can be processed more efficiently on GPU-like
architectures, yet still producing the same results?
Inspired by the question, this work introduces Tigr ---
a graph transformation framework that can effectively
reduce the irregularity of real-world graphs with
correctness guarantees for a wide range of graph
analytics. To make the transformations practical, Tigr
features a lightweight virtual transformation scheme,
which can substantially reduce the costs of graph
transformations, while preserving the benefits of
reduced irregularity. Evaluation on Tigr-based GPU
graph processing shows significant and consistent
speedup over the state-of-the-art GPU graph processing
frameworks for a spectrum of irregular graphs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Haria:2018:DMH,
author = "Swapnil Haria and Mark D. Hill and Michael M. Swift",
title = "Devirtualizing Memory in Heterogeneous Systems",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "637--650",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173194",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Accelerators are increasingly recognized as one of the
major drivers of future computational growth. For
accelerators, shared virtual memory (VM) promises to
simplify programming and provide safe data sharing with
CPUs. Unfortunately, the overheads of virtual memory,
which are high for general-purpose processors, are even
higher for accelerators. Providing accelerators with
direct access to physical memory (PM) in contrast,
provides high performance but is both unsafe and more
difficult to program. We propose Devirtualized Memory
(DVM) to combine the protection of VM with direct
access to PM. By allocating memory such that physical
and virtual addresses are almost always identical
(VA==PA), DVM mostly replaces page-level address
translation with faster region-level Devirtualized
Access Validation (DAV). Optionally on read accesses,
DAV can be overlapped with data fetch to hide VM
overheads. DVM requires modest OS and IOMMU changes,
and is transparent to the application. Implemented in
Linux 4.10, DVM reduces VM overheads in a
graph-processing accelerator to just 1.6\% on average.
DVM also improves performance by 2.1X over an optimized
conventional VM implementation, while consuming 3.9X
less dynamic energy for memory management. We further
discuss DVM's potential to extend beyond accelerators
to CPUs, where it reduces VM overheads to 5\% on
average, down from 29\% for conventional VM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Kumar:2018:LLT,
author = "Mohan Kumar Kumar and Steffen Maass and Sanidhya
Kashyap and J{\'a}n Vesel{\'y} and Zi Yan and Taesoo
Kim and Abhishek Bhattacharjee and Tushar Krishna",
title = "{LATR}: Lazy Translation Coherence",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "651--664",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173198",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose LATR-lazy TLB coherence-a software-based
TLB shootdown mechanism that can alleviate the overhead
of the synchronous TLB shootdown mechanism in existing
operating systems. By handling the TLB coherence in a
lazy fashion, LATR can avoid expensive IPIs which are
required for delivering a shootdown signal to remote
cores, and the performance overhead of associated
interrupt handlers. Therefore, virtual memory
operations, such as free and page migration operations,
can benefit significantly from LATR's mechanism. For
example, LATR improves the latency of munmap() by
70.8\% on a 2-socket machine, a widely used
configuration in modern data centers. Real-world,
performance-critical applications such as web servers
can also benefit from LATR: without any
application-level changes, LATR improves Apache by
59.9\% compared to Linux, and by 37.9\% compared to
ABIS, a highly optimized, state-of-the-art TLB
coherence technique.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Taassori:2018:VRP,
author = "Meysam Taassori and Ali Shafiee and Rajeev
Balasubramonian",
title = "{VAULT}: Reducing Paging Overheads in {SGX} with
Efficient Integrity Verification Structures",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "665--678",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177155",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Intel's SGX offers state-of-the-art security features,
including confidentiality, integrity, and
authentication (CIA) when accessing sensitive pages in
memory. Sensitive pages are placed in an Enclave Page
Cache (EPC) within the physical memory before they can
be accessed by the processor. To control the overheads
imposed by CIA guarantees, the EPC operates with a
limited capacity (currently 128 MB). Because of this
limited EPC size, sensitive pages must be frequently
swapped between EPC and non-EPC regions in memory. A
page swap is expensive (about 40K cycles) because it
requires an OS system call, page copying, updates to
integrity trees and metadata, etc. Our analysis shows
that the paging overhead can slow the system on average
by 5$ \times $, and other studies have reported even
higher slowdowns for memory-intensive workloads. The
paging overhead can be reduced by growing the size of
the EPC to match the size of physical memory, while
allowing the EPC to also accommodate non-sensitive
pages. However, at least two important problems must be
addressed to enable this growth in EPC: (i) the depth
of the integrity tree and its cacheability must be
improved to keep memory bandwidth overheads in check,
(ii) the space overheads of integrity verification
(tree and MACs) must be reduced. We achieve both goals
by introducing a variable arity unified tree (VAULT)
organization that is more compact and has lower depth.
We further reduce the space overheads with techniques
that combine MAC sharing and compression. With
simulations, we show that the combination of our
techniques can address most inefficiencies in SGX
memory access and improve overall performance by 3.7$
\times $, relative to an SGX baseline, while incurring
a memory capacity over-head of only 4.7\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Panwar:2018:MHP,
author = "Ashish Panwar and Aravinda Prasad and K. Gopinath",
title = "Making Huge Pages Actually Useful",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "679--692",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173203",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The virtual-to-physical address translation overhead,
a major performance bottleneck for modern workloads,
can be effectively alleviated with huge pages. However,
since huge pages must be mapped contiguously, OSs have
not been able to use them well because of the memory
fragmentation problem despite hardware support for huge
pages being available for nearly two decades. This
paper presents a comprehensive study of the interaction
of fragmentation with huge pages in the Linux kernel.
We observe that when huge pages are used, problems such
as high CPU utilization and latency spikes occur
because of unnecessary work (e.g., useless page
migration) performed by memory management related
subsystems due to the poor handling of unmovable (i.e.,
kernel) pages. This behavior is even more harmful in
virtualized systems where unnecessary work may be
performed in both guest and host OSs. We present
Illuminator, an efficient memory manager that provides
various subsystems, such as the page allocator, the
ability to track all unmovable pages. It allows
subsystems to make informed decisions and eliminate
unnecessary work which in turn leads to cost-effective
huge page allocations. Illuminator reduces the cost of
compaction (up to 99\%), improves application
performance (up to 2.3x) and reduces the maximum
latency of MySQL database server (by 30x). Importantly,
this work shows the effectiveness of a simple solution
for long-standing huge page related problems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Evtyushkin:2018:BNS,
author = "Dmitry Evtyushkin and Ryan Riley and Nael CSE and ECE
Abu-Ghazaleh and Dmitry Ponomarev",
title = "{BranchScope}: a New Side-Channel Attack on
Directional Branch Predictor",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "693--707",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173204",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present BranchScope --- a new side-channel attack
where the attacker infers the direction of an arbitrary
conditional branch instruction in a victim program by
manipulating the shared directional branch predictor.
The directional component of the branch predictor
stores the prediction on a given branch (taken or
not-taken) and is a different component from the branch
target buffer (BTB) attacked by previous work.
BranchScope is the first fine-grained attack on the
directional branch predictor, expanding our
understanding of the side channel vulnerability of the
branch prediction unit. Our attack targets complex
hybrid branch predictors with unknown organization. We
demonstrate how an attacker can force these predictors
to switch to a simple 1-level mode to simplify the
direction recovery. We carry out BranchScope on several
recent Intel CPUs and also demonstrate the attack
against an SGX enclave.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Dickens:2018:SCI,
author = "Bernard {Dickens III} and Haryadi S. Gunawi and Ariel
J. Feldman and Henry Hoffmann",
title = "{StrongBox}: Confidentiality, Integrity, and
Performance using Stream Ciphers for Full Drive
Encryption",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "708--721",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173183",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Full-drive encryption (FDE) is especially important
for mobile devices because they contain large
quantities of sensitive data yet are easily lost or
stolen. Unfortunately, the standard approach to FDE-the
AES block cipher in XTS mode-is 3--5$ \times $ slower
than unencrypted storage. Authenticated encryption
based on stream ciphers is already used as a faster
alternative to AES in other contexts, such as HTTPS,
but the conventional wisdom is that stream ciphers are
unsuitable for FDE. Used naively in drive encryption,
stream ciphers are vulnerable to attacks, and
mitigating these attacks with on-drive metadata is
generally believed to ruin performance. In this paper,
we argue that recent developments in mobile hardware
invalidate this assumption, making it possible to use
fast stream ciphers for FDE. Modern mobile devices
employ solid-state storage with Flash Translation
Layers (FTL), which operate similarly to Log-structured
File Systems (LFS). They also include trusted hardware
such as Trusted Execution Environments (TEEs) and
secure storage areas. Leveraging these two trends, we
propose StrongBox, a stream cipher-based FDE layer that
is a drop-in replacement for dm-crypt, the standard
Linux FDE module based on AES-XTS. StrongBox introduces
a system design and on-drive data structures that
exploit LFS's lack of overwrites to avoid costly
rekeying and a counter stored in trusted hardware to
protect against attacks. We implement StrongBox on an
ARM big.LITTLE mobile processor and test its
performance under multiple popular production LFSes. We
find that StrongBox improves read performance by as
much as 2.36$ \times $ (1.72$ \times $ on average)
while offering stronger integrity guarantees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Hunger:2018:DDC,
author = "Casen Hunger and Lluis Vilanova and Charalampos
Papamanthou and Yoav Etsion and Mohit Tiwari",
title = "{DATS} --- Data Containers for {Web} Applications",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "722--736",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173213",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Data containers enable users to control access to
their data while untrusted applications compute on it.
However, they require replicating an application inside
each container --- compromising functionality,
programmability, and performance. We propose DATS --- a
system to run web applications that retains application
usability and efficiency through a mix of hardware
capability enhanced containers and the introduction of
two new primitives modeled after the popular
model-view-controller (MVC) pattern. (1) DATS
introduces a templating language to create views that
compose data across data containers. (2) DATS uses
authenticated storage and confinement to enable an
untrusted storage service, such as memcached and
deduplication, to operate on plain-text data across
containers. These two primitives act as robust
declassifiers that allow DATS to enforce
non-interference across containers, taking large
applications out of the trusted computing base (TCB).
We showcase eight different web applications including
Gitlab and a Slack-like chat, significantly improve the
worst-case overheads due to application replication,
and demonstrate usable performance for common-case
usage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Mallon:2018:DPP,
author = "Stephen Mallon and Vincent Gramoli and Guillaume
Jourjon",
title = "{DLibOS}: Performance and Protection with a
Network-on-Chip",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "737--750",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173209",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A long body of research work has led to the conjecture
that highly efficient IO processing at user-level would
necessarily violate protection. In this paper, we
debunk this myth by introducing DLibOS a new paradigm
that consists of distributing a library OS on
specialized cores to achieve performance and protection
at the user-level. Its main novelty consists of
leveraging network-on-chip to allow hardware message
passing, rather than context switches, for
communication between different address spaces. To
demonstrate the feasibility of our approach, we
implement a driver and a network stack at user-level on
a Tilera many-core machine. We define a novel
asynchronous socket interface and partition the memory
such that the reception, the transmission and the
application modify isolated regions. Our high
performance results of 4.2 and 3.1 million requests per
second obtained on a webserver and the Memcached
applications, respectively, confirms the relevance of
our design decisions. Finally, we compare DLibOS
against a non-protected user-level network stack and
show that protection comes at a negligible cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Lin:2018:AIA,
author = "Shih-Chieh Lin and Yunqi Zhang and Chang-Hong Hsu and
Matt Skach and Md E. Haque and Lingjia Tang and Jason
Mars",
title = "The Architectural Implications of Autonomous Driving:
Constraints and Acceleration",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "751--766",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173191",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Autonomous driving systems have attracted a
significant amount of interest recently, and many
industry leaders, such as Google, Uber, Tesla, and
Mobileye, have invested a large amount of capital and
engineering power on developing such systems. Building
autonomous driving systems is particularly challenging
due to stringent performance requirements in terms of
both making the safe operational decisions and
finishing processing at real-time. Despite the recent
advancements in technology, such systems are still
largely under experimentation and architecting
end-to-end autonomous driving systems remains an open
research question. To investigate this question, we
first present and formalize the design constraints for
building an autonomous driving system in terms of
performance, predictability, storage, thermal and
power. We then build an end-to-end autonomous driving
system using state-of-the-art award-winning algorithms
to understand the design trade-offs for building such
systems. In our real-system characterization, we
identify three computational bottlenecks, which
conventional multicore CPUs are incapable of processing
under the identified design constraints. To meet these
constraints, we accelerate these algorithms using three
accelerator platforms including GPUs, FPGAs, and ASICs,
which can reduce the tail latency of the system by
169x, 10x, and 93x respectively. With accelerator-based
designs, we are able to build an end-to-end autonomous
driving system that meets all the design constraints,
and explore the trade-offs among performance, power and
the higher accuracy enabled by higher resolution
cameras.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Colin:2018:RES,
author = "Alexei Colin and Emily Ruppel and Brandon Lucia",
title = "A Reconfigurable Energy Storage Architecture for
Energy-harvesting Devices",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "767--781",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173210",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Battery-free, energy-harvesting devices operate using
energy collected exclusively from their environment.
Energy-harvesting devices allow maintenance-free
deployment in extreme environments, but requires a
power system to provide the right amount of energy when
an application needs it. Existing systems must
provision energy capacity statically based on an
application's peak demand which compromises efficiency
and responsiveness when not at peak demand. This work
presents Capybara: a co-designed hardware/software
power system with dynamically reconfigurable energy
storage capacity that meets varied application energy
demand. The Capybara software interface allows
programmers to specify the energy mode of an
application task. Capybara's runtime system
reconfigures Capybara's hardware energy capacity to
match application demand. Capybara also allows a
programmer to write reactive application tasks that
pre-allocate a burst of energy that it can spend in
response to an asynchronous (e.g., external) event. We
instantiated Capybara's hardware design in two EH
devices and implemented three reactive sensing
applications using its software interface. Capybara
improves event detection accuracy by 2x-4x over
statically-provisioned energy capacity, maintains
response latency within 1.5x of a continuously-powered
baseline, and enables reactive applications that are
intractable with existing power systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Ma:2018:NNE,
author = "Kaisheng Ma and Xueqing Li and Mahmut Taylan Kandemir
and Jack Sampson and Vijaykrishnan Narayanan and
Jinyang Li and Tongda Wu and Zhibo Wang and Yongpan Liu
and Yuan Xie",
title = "{NEOFog}: Nonvolatility-Exploiting Optimizations for
Fog Computing",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "782--796",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3177154",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nonvolatile processors have emerged as one of the
promising solutions for energy harvesting scenarios,
among which Wireless Sensor Networks (WSN) provide some
of the most important applications. In a typical
distributed sensing system, due to difference in
location, energy harvester angles, power sources, etc.
different nodes may have different amount of energy
ready for use. While prior approaches have examined
these challenges, they have not done so in the context
of the features offered by nonvolatile computing
approaches, which disrupt certain foundational
assumptions. We propose a new set of
nonvolatility-exploiting optimizations and embody them
in the NEOFog system architecture. We discuss shifts in
the tradeoffs in data and program distribution for
nonvolatile processing-based WSNs, showing how
non-volatile processing and non-volatile RF support
alter the benefits of computation and
communication-centric approaches. We also propose a new
algorithm specific to nonvolatile sensing systems for
load balancing both computation and communication
demands. Collectively, the NV-aware optimizations in
NEOFog increase the ability to perform in-fog
processing by 4.2X and can increase this to 8X if
virtualized nodes are 3X multiplexed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Lottarini:2018:VBV,
author = "Andrea Lottarini and Alex Ramirez and Joel Coburn and
Martha A. Kim and Parthasarathy Ranganathan and Daniel
Stodolsky and Mark Wachsler",
title = "{\tt vbench}: Benchmarking Video Transcoding in the
Cloud",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "797--809",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173207",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents vbench, a publicly available
benchmark for cloud video services. We are the first
study, to the best of our knowledge, to characterize
the emerging video-as-a-service workload. Unlike prior
video processing benchmarks, vbench's videos are
algorithmically selected to represent a large
commercial corpus of millions of videos. Reflecting the
complex infrastructure that processes and hosts these
videos, vbench includes carefully constructed metrics
and baselines. The combination of validated corpus,
baselines, and metrics reveal nuanced tradeoffs between
speed, quality, and compression. We demonstrate the
importance of video selection with a microarchitectural
study of cache, branch, and SIMD behavior. vbench
reveals trends from the commercial corpus that are not
visible in other video corpuses. Our experiments with
GPUs under vbench's scoring scenarios reveal that
context is critical: GPUs are well suited for
live-streaming, while for video-on-demand shift costs
from compute to storage and network.
Counterintuitively, they are not viable for popular
videos, for which highly compressed, high quality
copies are required. We instead find that popular
videos are currently well-served by the current
trajectory of software encoders.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Blackburn:2018:SDS,
author = "Steve Blackburn",
title = "Session details: Session 7B: Memory 2",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252965",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Carter:2018:SDS,
author = "John Carter",
title = "Session details: Session 6B: Datacenters",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252963",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Criswell:2018:SDS,
author = "John Criswell",
title = "Session details: Session 8A: Security and Protection",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252966",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Gandhi:2018:SDS,
author = "Jayneel Gandhi",
title = "Session details: Session 6A: {GPU} 2",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252962",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Hoffmann:2018:SDS,
author = "Hank Hoffmann",
title = "Session details: Session 5A: Concurrency and
Parallelism",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252960",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Kim:2018:SDS,
author = "Martha Kim",
title = "Session details: Session 7A: Irregular Apps and
Graphs",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252964",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Larus:2018:SDS,
author = "James Larus",
title = "Session details: Session 2B: Performance Management",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252955",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Lee:2018:SDS,
author = "Dongyoon Lee",
title = "Session details: Session 3B: Mobile Applications",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252957",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Liu:2018:SDS,
author = "Lei Liu",
title = "Session details: Session 1B: Managed Runtimes and
Dynamic Translation",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252953",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Lu:2018:SDS,
author = "Shan Lu",
title = "Session details: Session 4B: Program Analysis",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252959",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Narayanasamy:2018:SDS,
author = "Satish Narayanasamy",
title = "Session details: Session 3A: Programmable Devices and
Co-processors",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252956",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Rossbach:2018:SDS,
author = "Christopher J. Rossbach",
title = "Session details: Session 2A: {GPUs} 1",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252954",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Sampson:2018:SDS,
author = "Adrian Sampson?",
title = "Session details: Session 5B Neural Networks",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252961",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Solihin:2018:SDS,
author = "Yan Solihin",
title = "Session details: Session 8B: Potpourri",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252967",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Torrellas:2018:SDS,
author = "Josep Torrellas",
title = "Session details: Session 1A: New Architectures",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252952",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Tsafrir:2018:SDS,
author = "Dan Tsafrir",
title = "Session details: Session 4A: Memory 1",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "??--??",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3252958",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ASPLOS '18 proceedings.",
}
@Article{Wang:2018:HSA,
author = "Kunshan Wang and Stephen M. Blackburn and Antony L.
Hosking and Michael Norrish",
title = "Hop, Skip, \& Jump: Practical On-Stack Replacement for
a Cross-Platform Language-Neutral {VM}",
journal = j-SIGPLAN,
volume = "53",
number = "3",
pages = "1--16",
month = mar,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296975.3186412",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "On-stack replacement (OSR) is a performance-critical
technology for many languages, especially dynamic
languages. Conventional wisdom, apparent in JavaScript
engines such as V8 and SpiderMonkey, is that OSR must
be implemented in a low-level (i.e., in assembly) and
language-specific way. This paper presents an OSR
abstraction based on Swapstack, materialized as the API
for a low-level virtual machine, and shows how the
abstraction of resumption protocols facilitates an
elegant implementation of this API on real hardware.
Using an experimental JavaScript implementation, we
demonstrate that this API enables the language
implementation to perform OSR without the need to deal
with machine-level details. We also show that the API
itself is implementable on concrete hardware. This work
helps crystallize OSR abstractions and, by providing a
reusable implementation, brings OSR within reach for
more language implementers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '18 proceedings.",
}
@Article{Wang:2018:IDG,
author = "Wenwen Wang and Jiacheng Wu and Xiaoli Gong and Tao Li
and Pen-Chung Yew",
title = "Improving Dynamically-Generated Code Performance on
Dynamic Binary Translators",
journal = j-SIGPLAN,
volume = "53",
number = "3",
pages = "17--30",
month = mar,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296975.3186413",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The recent transition in the software industry toward
dynamically generated code poses a new challenge to
existing dynamic binary translation (DBT) systems. A
significant re-translation overhead could be introduced
due to the maintenance of the consistency between the
dynamically-generated guest code and the corresponding
translated host code. To address this issue, this paper
presents a novel approach to optimize DBT systems for
guest applications with dynamically-generated code. The
proposed approach can maximize the reuse of previously
translated host code to mitigate the re-translation
overhead. A prototype based on such an approach has
been implemented on an existing DBT system HQEMU.
Experimental results on a set of JavaScript
applications show that it can achieve a 1.24X
performance speedup on average compared to the original
HQEMU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '18 proceedings.",
}
@Article{Ma:2018:GEG,
author = "Jiacheng Ma and Xiao Zheng and Yaozu Dong and Wentai
Li and Zhengwei Qi and Bingsheng He and Haibing Guan",
title = "{gMig}: Efficient {GPU} Live Migration Optimized by
Software Dirty Page for Full Virtualization",
journal = j-SIGPLAN,
volume = "53",
number = "3",
pages = "31--44",
month = mar,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296975.3186414",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "This paper introduces gMig, an open-source and
practical GPU live migration solution for full
virtualization. By taking advantage of the dirty
pattern of GPU workloads, gMig presents the One-Shot
Pre-Copy combined with the hashing based Software Dirty
Page technique to achieve efficient GPU live migration.
Particularly, we propose three approaches for gMig: (1)
Dynamic Graphics Address Remapping, which parses and
manipulates GPU commands to adjust the address mapping
to adapt to a different environment after migration,
(2) Software Dirty Page, which utilizes a hashing based
approach to detect page modification, overcomes the
commodity GPU's hardware limitation, and speeds up the
migration by only sending the dirtied pages, (3)
One-Shot Pre-Copy, which greatly reduces the rounds of
pre-copy of graphics memory. Our evaluation shows that
gMig achieves GPU live migration with an average
downtime of 302 ms on Windows and 119 ms on Linux. With
the help of Software Dirty Page, the number of GPU
pages transferred during the downtime is effectively
reduced by 80.0\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '18 proceedings.",
}
@Article{Ruprecht:2018:VLM,
author = "Adam Ruprecht and Danny Jones and Dmitry Shiraev and
Greg Harmon and Maya Spivak and Michael Krebs and Miche
Baker-Harvey and Tyler Sanderson",
title = "{VM} Live Migration At Scale",
journal = j-SIGPLAN,
volume = "53",
number = "3",
pages = "45--56",
month = mar,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296975.3186415",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Uninterrupted uptime is a critical aspect of Virtual
Machines (VMs) offered by cloud hosting providers.
Google's VMs run on top of rapidly changing
infrastructure: we regularly update hardware and host
software, and we must quickly respond to failing
hardware. Frequent change is critical to both
development velocity---deploying new versions of
services and infrastructure---and the ability to
respond rapidly to defects, including critical security
fixes. Typically these updates would be disruptive,
resulting in VM termination or restart. In this paper
we present how we use VM live migration at scale to
eliminate this disruption with minimal impact to the
guest, performing over 1,000,0001migrations monthly in
our production fleet, with 50ms median blackout, 300ms
99th percentile blackout.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '18 proceedings.",
}
@Article{Xu:2018:DES,
author = "Yu Xu and Jianguo Yao and Yaozu Dong and Kun Tian and
Xiao Zheng and Haibing Guan",
title = "{Demon}: an Efficient Solution for on-Device {MMU}
Virtualization in Mediated Pass-Through",
journal = j-SIGPLAN,
volume = "53",
number = "3",
pages = "57--70",
month = mar,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296975.3186416",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Memory Management Units (MMUs) for on-device address
translation are widely used in modern devices. However,
conventional solutions for on-device MMU
virtualization, such as shadow page table implemented
in mediated pass-through, still suffer from high
complexity and low performance. We present Demon, an
efficient solution for on-DEvice MMU virtualizatiON in
mediated pass-through. The key insight is that Demon
takes advantage of IOMMU to construct a two-dimensional
address translation and dynamically switches the
2nd-dimensional page table to a proper candidate when
the device owner switches. In order to support
fine-grained parallelism for the device with multiple
engines, we put forward a hardware proposal that
separates the address space of each engine and enables
simultaneous device address remapping for multiple
virtual machines (VMs). We implement Demon with a
prototype named gDemon which virtualizes Intel GPU MMU.
Nonetheless, Demon is not limited to this particular
case. Evaluations show that gDemon provides up to
19.73x better performance in the media transcoding
workloads and achieves performance improvement of up to
17.09\% and 13.73\% in the 2D benchmarks and 3D
benchmarks, respectively, compared with gVirt. The
current release of gDemon scales up to 6 VMs with
moderate performance in our experiments. In addition,
gDemon simplifies the implementation of GPU MMU
virtualization with 37\% code reduction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '18 proceedings.",
}
@Article{Yan:2018:FPS,
author = "Qiuchen Yan and Stephen McCamant",
title = "{Fast PokeEMU}: Scaling Generated Instruction Tests
Using Aggregation and State Chaining",
journal = j-SIGPLAN,
volume = "53",
number = "3",
pages = "71--83",
month = mar,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296975.3186417",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Software that emulates a CPU has many applications,
but is difficult to implement correctly and requires
extensive testing. Since a large number of test cases
are required for full coverage, it is important that
the tests execute efficiently. We explore techniques
for combining many instruction tests into one program
to amortize overheads such as booting an emulator. To
ensure the results of each test are reflected in a
final result, we use the outputs of one instruction
test as an input to the next, and adopt the ``Feistel
network'' construction from cryptography so that each
step is invertible. We evaluate this approach by
applying it to PokeEMU, a tool that generates emulator
tests using symbolic execution. The combined tests run
much faster, but still reveal most of the same behavior
differences as when run individually.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '18 proceedings.",
}
@Article{Rigger:2018:AXI,
author = "Manuel Rigger and Stefan Marr and Stephen Kell and
David Leopoldseder and Hanspeter M{\"o}ssenb{\"o}ck",
title = "An Analysis of x86-64 Inline Assembly in {C}
Programs",
journal = j-SIGPLAN,
volume = "53",
number = "3",
pages = "84--99",
month = mar,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296975.3186418",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "C codebases frequently embed nonportable and
unstandardized elements such as inline assembly code.
Such elements are not well understood, which poses a
problem to tool developers who aspire to support C
code. This paper investigates the use of x86-64 inline
assembly in 1264 C projects from GitHub and combines
qualitative and quantitative analyses to answer
questions that tool authors may have. We found that
28.1\% of the most popular projects contain inline
assembly code, although the majority contain only a few
fragments with just one or two instructions. The most
popular instructions constitute a small subset
concerned largely with multicore semantics, performance
optimization, and hardware control. Our findings are
intended to help developers of C-focused tools, those
testing compilers, and language designers seeking to
reduce the reliance on inline assembly. They may also
aid the design of tools focused on inline assembly
itself.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "VEE '18 proceedings.",
}
@Article{Panchekha:2018:VWP,
author = "Pavel Panchekha and Adam T. Geller and Michael D.
Ernst and Zachary Tatlock and Shoaib Kamil",
title = "Verifying that web pages have accessible layout",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "1--14",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192407",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Usability and accessibility guidelines aim to make
graphical user interfaces accessible to all users, by,
say, requiring that text is sufficiently large,
interactive controls are visible, and heading size
corresponds to importance. These guidelines must hold
on the infinitely many possible renderings of a web
page generated by differing screen sizes, fonts, and
other user preferences. Today, these guidelines are
tested by manual inspection of a few renderings,
because (1) the guidelines are not expressed in a
formal language, (2) the semantics of browser rendering
are not well understood, and (3) no tools exist to
check all possible renderings of a web page. VizAssert
solves these problems. First, it introduces visual
logic to precisely specify accessibility properties.
Second, it formalizes a large fragment of the browser
rendering algorithm using novel finitization
reductions. Third, it provides a sound, automated tool
for verifying assertions in visual logic. We encoded 14
assertions drawn from best-practice accessibility and
mobile-usability guidelines in visual logic. VizAssert
checked them on on 62 professionally designed web
pages. It found 64 distinct errors in the web pages,
while reporting only 13 false positive warnings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Vilk:2018:BAD,
author = "John Vilk and Emery D. Berger",
title = "{BLeak}: automatically debugging memory leaks in web
applications",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "15--29",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192376",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the presence of garbage collection in managed
languages like JavaScript, memory leaks remain a
serious problem. In the context of web applications,
these leaks are especially pervasive and difficult to
debug. Web application memory leaks can take many
forms, including failing to dispose of unneeded event
listeners, repeatedly injecting iframes and CSS files,
and failing to call cleanup routines in third-party
libraries. Leaks degrade responsiveness by increasing
GC frequency and overhead, and can even lead to browser
tab crashes by exhausting available memory. Because
previous leak detection approaches designed for
conventional C, C++ or Java applications are
ineffective in the browser environment, tracking down
leaks currently requires intensive manual effort by web
developers. This paper introduces BLeak (Browser Leak
debugger), the first system for automatically debugging
memory leaks in web applications. BLeak's algorithms
leverage the observation that in modern web
applications, users often repeatedly return to the same
(approximate) visual state (e.g., the inbox view in
Gmail). Sustained growth between round trips is a
strong indicator of a memory leak. To use BLeak, a
developer writes a short script (17-73 LOC on our
benchmarks) to drive a web application in round trips
to the same visual state. BLeak then automatically
generates a list of leaks found along with their root
causes, ranked by return on investment. Guided by
BLeak, we identify and fix over 50 memory leaks in
popular libraries and apps including Airbnb, AngularJS,
Google Analytics, Google Maps SDK, and jQuery. BLeak's
median precision is 100\%; fixing the leaks it
identifies reduces heap growth by an average of 94\%,
saving from 0.5 MB to 8 MB per round trip. We believe
BLeak's approach to be broadly applicable beyond web
applications, including to GUI applications on desktop
and mobile platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Baxter:2018:PAS,
author = "Samuel Baxter and Rachit Nigam and Joe Gibbs Politz
and Shriram Krishnamurthi and Arjun Guha",
title = "Putting in all the stops: execution control for
{JavaScript}",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "30--45",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192370",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scores of compilers produce JavaScript, enabling
programmers to use many languages on the Web, reuse
existing code, and even use Web IDEs. Unfortunately,
most compilers inherit the browser's compromised
execution model, so long-running programs freeze the
browser tab, infinite loops crash IDEs, and so on. The
few compilers that avoid these problems suffer poor
performance and are difficult to engineer. This paper
presents Stopify, a source-to-source compiler that
extends JavaScript with debugging abstractions and
blocking operations, and easily integrates with
existing compilers. We apply Stopify to ten programming
languages and develop a Web IDE that supports stopping,
single-stepping, breakpointing, and long-running
computations. For nine languages, Stopify requires no
or trivial compiler changes. For eight, our IDE is the
first that provides these features. Two of our subject
languages have compilers with similar features.
Stopify's performance is competitive with these
compilers and it makes them dramatically simpler.
Stopify's abstractions rely on first-class
continuations, which it provides by compiling
JavaScript to JavaScript. We also identify
sub-languages of JavaScript that compilers implicitly
use, and exploit these to improve performance. Finally,
Stopify needs to repeatedly interrupt and resume
program execution. We use a sampling-based technique to
estimate program speed that outperforms other
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Gogte:2018:PSF,
author = "Vaibhav Gogte and Stephan Diestelhorst and William
Wang and Satish Narayanasamy and Peter M. Chen and
Thomas F. Wenisch",
title = "Persistency for synchronization-free regions",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "46--61",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192367",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Nascent persistent memory (PM) technologies promise
the performance of DRAM with the durability of disk,
but how best to integrate them into programming systems
remains an open question. Recent work extends language
memory models with a persistency model prescribing
semantics for updates to PM. These semantics enable
programmers to design data structures in PM that are
accessed like memory and yet are recoverable upon crash
or failure. Alas, we find the semantics and performance
of existing approaches unsatisfying. Existing
approaches require high-overhead mechanisms, are
restricted to certain synchronization constructs,
provide incomplete semantics, and/or may recover to
state that cannot arise in fault-free execution. We
propose persistency semantics that guarantee failure
atomicity of synchronization-free regions (SFRs) ---
program regions delimited by synchronization
operations. Our approach provides clear semantics for
the PM state recovery code may observe and extends
C++11's ``sequential consistency for data-race-free''
guarantee to post-failure recovery code. We investigate
two designs for failure-atomic SFRs that vary in
performance and the degree to which commit of
persistent state may lag execution. We demonstrate both
approaches in LLVM v3.6.0 and compare to a
state-of-the-art baseline to show performance
improvement up to 87.5\% (65.5\% avg).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Akram:2018:WRG,
author = "Shoaib Akram and Jennifer B. Sartor and Kathryn S.
McKinley and Lieven Eeckhout",
title = "Write-rationing garbage collection for hybrid
memories",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "62--77",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192392",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging Non-Volatile Memory (NVM) technologies offer
high capacity and energy efficiency compared to DRAM,
but suffer from limited write endurance and longer
latencies. Prior work seeks the best of both
technologies by combining DRAM and NVM in hybrid
memories to attain low latency, high capacity, energy
efficiency, and durability. Coarsegrained hardware and
OS optimizations then spread writes out (wear-leveling)
and place highly mutated pages in DRAM to extend NVM
lifetimes. Unfortunately even with these coarse-grained
methods, popular Java applications exact impractical
NVM lifetimes of 4 years or less. This paper shows how
to make hybrid memories practical, without changing the
programming model, by enhancing garbage collection in
managed language runtimes. We find object write
behaviors offer two opportunities: (1) 70\% of writes
occur to newly allocated objects, and (2) 2\% of
objects capture 81\% of writes to mature objects. We
introduce write-rationing garbage collectors that
exploit these fine-grained behaviors. They extend NVM
lifetimes by placing highly mutated objects in DRAM and
read-mostly objects in NVM. We implement two such
systems. (1) Kingsguard-nursery places new allocation
in DRAM and survivors in NVM, reducing NVM writes by 5$
\times $ versus NVM only with wear-leveling. (2)
Kingsguard-writers (KG-W) places nursery objects in
DRAM and survivors in a DRAM observer space. It
monitors all mature object writes and moves unwritten
mature objects from DRAM to NVM. Because most mature
objects are unwritten, KG-W exploits NVM capacity while
increasing NVM lifetimes by 11$ \times $. It reduces
the energy-delay product by 32\% over DRAM-only and
29\% over NVM-only. This work opens up new avenues for
making hybrid memories practical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Lin:2018:MSN,
author = "Chit-Kwan Lin and Andreas Wild and Gautham N. Chinya
and Tsung-Han Lin and Mike Davies and Hong Wang",
title = "Mapping spiking neural networks onto a manycore
neuromorphic architecture",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "78--89",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192371",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a compiler for Loihi, a novel manycore
neuromorphic processor that features a programmable,
on-chip learning engine for training and executing
spiking neural networks (SNNs). An SNN is distinguished
from other neural networks in that (1) its independent
computing units, or ``neurons'', communicate with
others only through spike messages; and (2) each neuron
evaluates local learning rules, which are functions of
spike arrival and departure timings, to modify its
local state. The collective neuronal state dynamics of
an SNN form a nonlinear dynamical system that can be
cast as an unconventional model of computation. To
realize such an SNN on Loihi requires each constituent
neuron to locally store and independently update its
own spike timing information. However, each Loihi core
has limited resources for this purpose and these must
be shared by neurons assigned to the same core. In this
work, we present a compiler for Loihi that maps the
neurons of an SNN onto and across Loihi's cores
efficiently. We show that a poor neuron-to-core mapping
can incur significant energy costs and address this
with a greedy algorithm that compiles SNNs onto Loihi
in a power-efficient manner. In so doing, we highlight
the need for further development of compilers for this
new, emerging class of architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Brutschy:2018:SSA,
author = "Lucas Brutschy and Dimitar Dimitrov and Peter
M{\"u}ller and Martin Vechev",
title = "Static serializability analysis for causal
consistency",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "90--104",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192415",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many distributed databases provide only weak
consistency guarantees to reduce synchronization
overhead and remain available under network partitions.
However, this leads to behaviors not possible under
stronger guarantees. Such behaviors can easily defy
programmer intuition and lead to errors that are
notoriously hard to detect. In this paper, we propose a
static analysis for detecting non-serializable
behaviors of applications running on top of
causally-consistent databases. Our technique is based
on a novel, local serializability criterion and
combines a generalization of graph-based techniques
from the database literature with another,
complementary analysis technique that encodes our
serializability criterion into first-order logic
formulas to be checked by an SMT solver. This analysis
is more expensive yet more precise and produces
concrete counter-examples. We implemented our methods
and evaluated them on a number of applications from two
different domains: cloud-backed mobile applications and
clients of a distributed database. Our experiments
demonstrate that our analysis is able to detect harmful
serializability violations while producing only a small
number of false alarms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Liu:2018:CIC,
author = "Peizun Liu and Thomas Wahl",
title = "{CUBA}: interprocedural {Context-UnBounded Analysis}
of concurrent programs",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "105--119",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192419",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A classical result by Ramalingam about
synchronization-sensitive interprocedural program
analysis implies that reachability for concurrent
threads running recursive procedures is undecidable. A
technique proposed by Qadeer and Rehof, to bound the
number of context switches allowed between the threads,
leads to an incomplete solution that is, however,
believed to catch ``most bugs'' in practice. The
question whether the technique can also prove the
absence of bugs at least in some cases has remained
largely open. In this paper we introduce a broad
verification methodology for resource-parameterized
programs that observes how changes to the resource
parameter affect the behavior of the program. Applied
to the context-unbounded analysis problem (CUBA), the
methodology results in partial verification techniques
for procedural concurrent programs. Our solutions may
not terminate, but are able to both refute and prove
context-unbounded safety for concurrent recursive
threads. We demonstrate the effectiveness of our method
using a variety of examples, the safe of which cannot
be proved safe by earlier, context-bounded methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Ferles:2018:SRA,
author = "Kostas Ferles and Jacob {Van Geffen} and Isil Dillig
and Yannis Smaragdakis",
title = "Symbolic reasoning for automatic signal placement",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "120--134",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192395",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Explicit signaling between threads is a perennial
cause of bugs in concurrent programs. While there are
several run-time techniques to automatically notify
threads upon the availability of some shared resource,
such techniques are not widely-adopted due to their
run-time overhead. This paper proposes a new solution
based on static analysis for automatically generating a
performant explicit-signal program from its
corresponding implicit-signal implementation. The key
idea is to generate verification conditions that allow
us to minimize the number of required signals and
unnecessary context switches, while guaranteeing
semantic equivalence between the source and target
programs. We have implemented our method in a tool
called Expresso and evaluate it on challenging
benchmarks from prior papers and open-source software.
Expresso-generated code significantly outperforms past
automatic signaling mechanisms (avg. 1.56x speedup) and
closely matches the performance of hand-optimized
explicit-signal code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Chen:2018:AAB,
author = "Yu-Fang Chen and Matthias Heizmann and Ondrej
Leng{\'a}l and Yong Li and Ming-Hsien Tsai and Andrea
Turrini and Lijun Zhang",
title = "Advanced automata-based algorithms for program
termination checking",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "135--150",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192405",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In 2014, Heizmann et al. proposed a novel framework
for program termination analysis. The analysis starts
with a termination proof of a sample path. The path is
generalized to a B{\"u}chi automaton (BA) whose
language (by construction) represents a set of
terminating paths. All these paths can be safely
removed from the program. The removal of paths is done
using automata difference, implemented via BA
complementation and intersection. The analysis
constructs in this way a set of BAs that jointly
``cover'' the behavior of the program, thus proving its
termination. An implementation of the approach in
Ultimate Automizer won the 1st place in the Termination
category of SV-COMP 2017. In this paper, we exploit
advanced automata-based algorithms and propose several
non-trivial improvements of the framework. To alleviate
the complementation computation for BAs---one of the
most expensive operations in the framework---, we
propose a multi-stage generalization construction. We
start with generalizations producing subclasses of BAs
(such as deterministic BAs) for which efficient
complementation algorithms are known, and proceed to
more general classes only if necessary. Particularly,
we focus on the quite expressive subclass of
semideterministic BAs and provide an improved
complementation algorithm for this class. Our
experimental evaluation shows that the proposed
approach significantly improves the power of
termination checking within the Ultimate Automizer
framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Ottoni:2018:HJP,
author = "Guilherme Ottoni",
title = "{HHVM JIT}: a profile-guided, region-based compiler
for {PHP} and Hack",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "151--165",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192374",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic languages such as PHP, JavaScript, Python, and
Ruby have been gaining popularity over the last two
decades. A very popular domain for these languages is
web development, including server-side development of
large-scale websites. As a result, improving the
performance of these languages has become more
important. Efficiently compiling programs in these
languages is challenging, and many popular dynamic
languages still lack efficient production-quality
implementations. This paper describes the design of the
second generation of the HHVM JIT and how it addresses
the challenges to efficiently execute PHP and Hack
programs. This new design uses profiling to build an
aggressive region-based JIT compiler. We discuss the
benefits of this approach compared to the more popular
method-based and trace-based approaches to compile
dynamic languages. Our evaluation running a very large
PHP-based code base, the Facebook website, demonstrates
the effectiveness of the new JIT design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{DElia:2018:SRD,
author = "Daniele Cono D'Elia and Camil Demetrescu",
title = "On-stack replacement, distilled",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "166--180",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192396",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "On-stack replacement (OSR) is essential technology for
adaptive optimization, allowing changes to code
actively executing in a managed runtime. The
engineering aspects of OSR are well-known among VM
architects, with several implementations available to
date. However, OSR is yet to be explored as a general
means to transfer execution between related program
versions, which can pave the road to unprecedented
applications that stretch beyond VMs. We aim at filling
this gap with a constructive and provably correct OSR
framework, allowing a class of general-purpose
transformation functions to yield a special-purpose
replacement. We describe and evaluate an implementation
of our technique in LLVM. As a novel application of
OSR, we present a feasibility study on debugging of
optimized code, showing how our techniques can be used
to fix variables holding incorrect values at
breakpoints due to optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Duck:2018:ETM,
author = "Gregory J. Duck and Roland H. C. Yap",
title = "{EffectiveSan}: type and memory error detection using
dynamically typed {C\slash C++}",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "181--195",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192388",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Low-level programming languages with weak/static type
systems, such as C and C++, are vulnerable to errors
relating to the misuse of memory at runtime, such as
(sub-)object bounds overflows, (re)use-after-free, and
type confusion. Such errors account for many security
and other undefined behavior bugs for programs written
in these languages. In this paper, we introduce the
notion of dynamically typed C/C++, which aims to detect
such errors by dynamically checking the ``effective
type'' of each object before use at runtime. We also
present an implementation of dynamically typed C/C++ in
the form of the Effective Type Sanitizer
(EffectiveSan). EffectiveSan enforces type and memory
safety using a combination of low-fat pointers, type
meta data and type/bounds check instrumentation. We
evaluate EffectiveSan against the SPEC2006 benchmark
suite and the Firefox web browser, and detect several
new type and memory errors. We also show that
EffectiveSan achieves high compatibility and reasonable
overheads for the given error coverage. Finally, we
highlight that EffectiveSan is one of only a few tools
that can detect sub-object bounds errors, and uses a
novel approach (dynamic type checking) to do so.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Cai:2018:CRC,
author = "Cheng Cai and Qirun Zhang and Zhiqiang Zuo and Khanh
Nguyen and Guoqing Xu and Zhendong Su",
title = "Calling-to-reference context translation via
constraint-guided {CFL}-reachability",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "196--210",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192378",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A calling context is an important piece of information
used widely to help developers understand program
executions (e.g., for debugging). While calling
contexts offer useful control information, information
regarding data involved in a bug (e.g., what data
structure holds a leaking object), in many cases, can
bring developers closer to the bug's root cause. Such
data information, often exhibited as heap reference
paths, has already been needed by many tools. The only
way for a dynamic analysis to record complete reference
paths is to perform heap dumping, which incurs huge
runtime overhead and renders the analysis impractical.
This paper presents a novel static analysis that can
precisely infer, from a calling context of a method
that contains a use (e.g., read or write) of an object,
the heap reference paths leading to the object at the
time the use occurs. Since calling context recording is
much less expensive, our technique provides benefits
for all dynamic techniques that need heap information,
significantly reducing their overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Chong:2018:STW,
author = "Nathan Chong and Tyler Sorensen and John Wickerson",
title = "The semantics of transactions and weak memory in x86,
{Power}, {ARM}, and {C++}",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "211--225",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Weak memory models provide a complex, system-centric
semantics for concurrent programs, while transactional
memory (TM) provides a simpler, programmer-centric
semantics. Both have been studied in detail, but their
combined semantics is not well understood. This is
problematic because such widely-used architectures and
languages as x86, Power, and C++ all support TM, and
all have weak memory models. Our work aims to clarify
the interplay between weak memory and TM by extending
existing axiomatic weak memory models (x86, Power,
ARMv8, and C++) with new rules for TM. Our formal
models are backed by automated tooling that enables (1)
the synthesis of tests for validating our models
against existing implementations and (2) the
model-checking of TM-related transformations, such as
lock elision and compiling C++ transactions to
hardware. A key finding is that a proposed TM extension
to ARMv8 currently being considered within ARM Research
is incompatible with lock elision without sacrificing
portability or performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Milano:2018:MLM,
author = "Matthew Milano and Andrew C. Myers",
title = "{MixT}: a language for mixing consistency in
geodistributed transactions",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "226--241",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192375",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming concurrent, distributed systems is
hard-especially when these systems mutate shared,
persistent state replicated at geographic scale. To
enable high availability and scalability, a new class
of weakly consistent data stores has become popular.
However, some data needs strong consistency. To
manipulate both weakly and strongly consistent data in
a single transaction, we introduce a new abstraction:
mixed-consistency transactions, embodied in a new
embedded language, MixT. Programmers explicitly
associate consistency models with remote storage sites;
each atomic, isolated transaction can access a mixture
of data with different consistency models. Compile-time
information-flow checking, applied to consistency
models, ensures that these models are mixed safely and
enables the compiler to automatically partition
transactions. New run-time mechanisms ensure that
consistency models can also be mixed safely, even when
the data used by a transaction resides on separate,
mutually unaware stores. Performance measurements show
that despite their stronger guarantees,
mixed-consistency transactions retain much of the speed
of weak consistency, significantly outperforming
traditional serializable transactions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Dolan:2018:BDR,
author = "Stephen Dolan and KC Sivaramakrishnan and Anil
Madhavapeddy",
title = "Bounding data races in space and time",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "242--255",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192421",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new semantics for shared-memory parallel
programs that gives strong guarantees even in the
presence of data races. Our local data race freedom
property guarantees that all data-race-free portions of
programs exhibit sequential semantics. We provide a
straightforward operational semantics and an equivalent
axiomatic model, and evaluate an implementation for the
OCaml programming language. Our evaluation demonstrates
that it is possible to balance a comprehensible memory
model with a reasonable (no overhead on x86, ~0.6\% on
ARM) sequential performance trade-off in a mainstream
programming language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Sanchez-Stern:2018:FRC,
author = "Alex Sanchez-Stern and Pavel Panchekha and Sorin
Lerner and Zachary Tatlock",
title = "Finding root causes of floating point error",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "256--269",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192411",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Floating-point arithmetic plays a central role in
science, engineering, and finance by enabling
developers to approximate real arithmetic. To address
numerical issues in large floating-point applications,
developers must identify root causes, which is
difficult because floating-point errors are generally
non-local, non-compositional, and non-uniform. This
paper presents Herbgrind, a tool to help developers
identify and address root causes in numerical code
written in low-level languages like C/C++ and Fortran.
Herbgrind dynamically tracks dependencies between
operations and program outputs to avoid false positives
and abstracts erroneous computations to simplified
program fragments whose improvement can reduce output
error. We perform several case studies applying
Herbgrind to large, expert-crafted numerical programs
and show that it scales to applications spanning
hundreds of thousands of lines, correctly handling the
low-level details of modern floating point hardware and
mathematical libraries and tracking error across
function boundaries and through the heap.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Adams:2018:RFF,
author = "Ulf Adams",
title = "{Ry{\=u}}: fast float-to-string conversion",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "270--282",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192369",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present Ry{\=u}, a new routine to convert binary
floating point numbers to their decimal representations
using only fixed-size integer operations, and prove its
correctness. Ry{\=u} is simpler and approximately three
times faster than the previously fastest
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
keywords = "base conversion; input-output conversion; radix
conversion; round-trip base conversion",
remark = "PLDI '18 proceedings.",
}
@Article{Steindorfer:2018:MOA,
author = "Michael J. Steindorfer and Jurgen J. Vinju",
title = "To-many or to-one? {All}-in-one! {Efficient} purely
functional multi-maps with type-heterogeneous
hash-tries",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "283--295",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192420",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "An immutable multi-map is a many-to-many map data
structure with expected fast insert and lookup
operations. This data structure is used for
applications processing graphs or many-to-many
relations as applied in compilers, runtimes of
programming languages, or in static analysis of
object-oriented systems. Collection data structures are
assumed to carefully balance execution time of
operations with memory consumption characteristics and
need to scale gracefully from a few elements to
multiple gigabytes at least. When processing larger
in-memory data sets the overhead of the data structure
encoding itself becomes a memory usage bottleneck,
dominating the overall performance. In this paper we
propose AXIOM, a novel hash-trie data structure that
allows for a highly efficient and type-safe multi-map
encoding by distinguishing inlined values of singleton
sets from nested sets of multi-mappings. AXIOM strictly
generalizes over previous hash-trie data structures by
supporting the processing of fine-grained
type-heterogeneous content on the implementation level
(while API and language support for type-heterogeneity
are not scope of this paper). We detail the design and
optimizations of AXIOM and further compare it against
state-of-the-art immutable maps and multi-maps in Java,
Scala and Clojure. We isolate key differences using
microbenchmarks and validate the resulting conclusions
on a case study in static analysis. AXIOM reduces the
key-value storage overhead by 1.87x; with specializing
and inlining across collection boundaries it improves
by 5.1x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Koeplinger:2018:SLC,
author = "David Koeplinger and Matthew Feldman and Raghu
Prabhakar and Yaqi Zhang and Stefan Hadjis and Ruben
Fiszel and Tian Zhao and Luigi Nardi and Ardavan Pedram
and Christos Kozyrakis and Kunle Olukotun",
title = "{Spatial}: a language and compiler for application
accelerators",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "296--311",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192379",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Industry is increasingly turning to reconfigurable
architectures like FPGAs and CGRAs for improved
performance and energy efficiency. Unfortunately,
adoption of these architectures has been limited by
their programming models. HDLs lack abstractions for
productivity and are difficult to target from higher
level languages. HLS tools are more productive, but
offer an ad-hoc mix of software and hardware
abstractions which make performance optimizations
difficult. In this work, we describe a new
domain-specific language and compiler called Spatial
for higher level descriptions of application
accelerators. We describe Spatial's hardware-centric
abstractions for both programmer productivity and
design performance, and summarize the compiler passes
required to support these abstractions, including
pipeline scheduling, automatic memory banking, and
automated design tuning driven by active machine
learning. We demonstrate the language's ability to
target FPGAs and CGRAs from common source code. We show
that applications written in Spatial are, on average,
42\% shorter and achieve a mean speedup of 2.9x over
SDAccel HLS when targeting a Xilinx UltraScale+ VU9P
FPGA on an Amazon EC2 F1 instance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Kislal:2018:ECC,
author = "Orhan Kislal and Jagadish Kotra and Xulong Tang and
Mahmut Taylan Kandemir and Myoungsoo Jung",
title = "Enhancing computation-to-core assignment with physical
location information",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "312--327",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192386",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Going beyond a certain number of cores in modern
architectures requires an on-chip network more scalable
than conventional buses. However, employing an on-chip
network in a manycore system (to improve scalability)
makes the latencies of the data accesses issued by a
core non-uniform. This non-uniformity can play a
significant role in shaping the overall application
performance. This work presents a novel compiler
strategy which involves exposing architecture
information to the compiler to enable an optimized
computation-to-core mapping. Specifically, we propose a
compiler-guided scheme that takes into account the
relative positions of (and distances between) cores,
last-level caches (LLCs) and memory controllers (MCs)
in a manycore system, and generates a mapping of
computations to cores with the goal of minimizing the
on-chip network traffic. The experimental data
collected using a set of 21 multi-threaded applications
reveal that, on an average, our approach reduces the
on-chip network latency in a 6$ \times $6 manycore
system by 38.4\% in the case of private LLCs, and
43.8\% in the case of shared LLCs. These improvements
translate to the corresponding execution time
improvements of 10.9\% and 12.7\% for the private LLC
and shared LLC based systems, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Tran:2018:SSH,
author = "Kim-Anh Tran and Alexandra Jimborean and Trevor E.
Carlson and Konstantinos Koukos and Magnus
Sj{\"a}lander and Stefanos Kaxiras",
title = "{SWOOP}: software-hardware co-design for
non-speculative, execute-ahead, in-order cores",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "328--343",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192393",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Increasing demands for energy efficiency constrain
emerging hardware. These new hardware trends challenge
the established assumptions in code generation and
force us to rethink existing software optimization
techniques. We propose a cross-layer redesign of the
way compilers and the underlying microarchitecture are
built and interact, to achieve both performance and
high energy efficiency. In this paper, we address one
of the main performance bottlenecks --- last-level
cache misses --- through a software-hardware co-design.
Our approach is able to hide memory latency and attain
increased memory and instruction level parallelism by
orchestrating a non-speculative, execute-ahead paradigm
in software (SWOOP). While out-of-order (OoO)
architectures attempt to hide memory latency by
dynamically reordering instructions, they do so through
expensive, power-hungry, speculative mechanisms.We aim
to shift this complexity into software, and we build
upon compilation techniques inherited from VLIW,
software pipelining, modulo scheduling, decoupled
access-execution, and software prefetching. In contrast
to previous approaches we do not rely on either
software or hardware speculation that can be
detrimental to efficiency. Our SWOOP compiler is
enhanced with lightweight architectural support, thus
being able to transform applications that include
highly complex control-flow and indirect memory
accesses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Liu:2018:ISI,
author = "Hongyu Liu and Sam Silvestro and Wei Wang and Chen
Tian and Tongping Liu",
title = "{iReplayer}: in-situ and identical record-and-replay
for multithreaded applications",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "344--358",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192380",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Reproducing executions of multithreaded programs is
very challenging due to many intrinsic and external
non-deterministic factors. Existing RnR systems achieve
significant progress in terms of performance overhead,
but none targets the in-situ setting, in which replay
occurs within the same process as the recording
process. Also, most existing work cannot achieve
identical replay, which may prevent the reproduction of
some errors. This paper presents iReplayer, which aims
to identically replay multithreaded programs in the
original process (under the ``in-situ'' setting). The
novel in-situ and identical replay of iReplayer makes
it more likely to reproduce errors, and allows it to
directly employ debugging mechanisms (e.g. watchpoints)
to aid failure diagnosis. Currently, iReplayer only
incurs 3\% performance overhead on average, which
allows it to be always enabled in the production
environment. iReplayer enables a range of
possibilities, and this paper presents three examples:
two automatic tools for detecting buffer overflows and
use-after-free bugs, and one interactive debugging tool
that is integrated with GDB.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Liu:2018:DFC,
author = "Bozhen Liu and Jeff Huang",
title = "{D4}: fast concurrency debugging with parallel
differential analysis",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "359--373",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192390",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present D4, a fast concurrency analysis framework
that detects concurrency bugs (e.g., data races and
deadlocks) interactively in the programming phase. As
developers add, modify, and remove statements, the code
changes are sent to D4 to detect concurrency bugs in
real time, which in turn provides immediate feedback to
the developer of the new bugs. The cornerstone of D4
includes a novel system design and two novel parallel
differential algorithms that embrace both change and
parallelization for fundamental static analyses of
concurrent programs. Both algorithms react to program
changes by memoizing the analysis results and only
recomputing the impact of a change in parallel. Our
evaluation on an extensive collection of large
real-world applications shows that D4 efficiently
pinpoints concurrency bugs within 100ms on average
after a code change, several orders of magnitude faster
than both the exhaustive analysis and the
state-of-the-art incremental techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Roemer:2018:HCU,
author = "Jake Roemer and Kaan Gen{\c{c}} and Michael D. Bond",
title = "High-coverage, unbounded sound predictive race
detection",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "374--389",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192385",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic program analysis can predict data races
knowable from an observed execution, but existing
predictive analyses either miss races or cannot analyze
full program executions. This paper presents
Vindicator, a novel, sound (no false races) predictive
approach that finds more data races than existing
predictive approaches. Vindicator achieves high
coverage by using a new, efficient analysis that finds
all possible predictable races but may detect false
races. Vindicator ensures soundness using a novel
algorithm that checks each potential race to determine
whether it is a true predictable race. An evaluation
using large Java programs shows that Vindicator finds
hard-to-detect predictable races that existing sound
predictive analyses miss, at a comparable performance
cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Peng:2018:CDC,
author = "Yuanfeng Peng and Vinod Grover and Joseph Devietti",
title = "{CURD}: a dynamic {CUDA} race detector",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "390--403",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192368",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As GPUs have become an integral part of nearly every
processor, GPU programming has become increasingly
popular. GPU programming requires a combination of
extreme levels of parallelism and low-level
programming, making it easy for concurrency bugs such
as data races to arise. These concurrency bugs can be
extremely subtle and di cult to debug due to the
massive numbers of threads running concurrently on a
modern GPU. While some tools exist to detect data races
in GPU programs, they are often prohibitively slow or
focused only on a small class of data races in shared
memory. Compared to prior work, our race detector,
CURD, can detect data races precisely on both shared
and global memory, selects an appropriate race
detection algorithm based on the synchronization used
in a program, and utilizes efficient compiler
instrumentation to reduce performance overheads. Across
53 benchmarks, we find that using CURD incurs an aver-
age slowdown of just 2.88x over native execution. CURD
is 2.1x faster than Nvidia's CUDA-Racecheck race
detector, despite detecting a much broader class of
races. CURD finds 35 races across our benchmarks,
including bugs in established benchmark suites and in
sample programs from Nvidia.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Alon:2018:GPB,
author = "Uri Alon and Meital Zilberstein and Omer Levy and Eran
Yahav",
title = "A general path-based representation for predicting
program properties",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "404--419",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192412",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/csharp.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Predicting program properties such as names or
expression types has a wide range of applications. It
can ease the task of programming, and increase
programmer productivity. A major challenge when
learning from programs is how to represent programs in
a way that facilitates effective learning. We present a
general path-based representation for learning from
programs. Our representation is purely syntactic and
extracted automatically. The main idea is to represent
a program using paths in its abstract syntax tree
(AST). This allows a learning model to leverage the
structured nature of code rather than treating it as a
flat sequence of tokens. We show that this
representation is general and can: (i) cover different
prediction tasks, (ii) drive different learning
algorithms (for both generative and discriminative
models), and (iii) work across different programming
languages. We evaluate our approach on the tasks of
predicting variable names, method names, and full
types. We use our representation to drive both
CRF-based and word2vec-based learning, for programs of
four languages: JavaScript, Java, Python and C\#. Our
evaluation shows that our approach obtains better
results than task-specific handcrafted representations
across different tasks and programming languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Feng:2018:PSU,
author = "Yu Feng and Ruben Martins and Osbert Bastani and Isil
Dillig",
title = "Program synthesis using conflict-driven learning",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "420--435",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192382",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a new conflict-driven program synthesis
technique that is capable of learning from past
mistakes. Given a spurious program that violates the
desired specification, our synthesis algorithm
identifies the root cause of the conflict and learns
new lemmas that can prevent similar mistakes in the
future. Specifically, we introduce the notion of
equivalence modulo conflict and show how this idea can
be used to learn useful lemmas that allow the
synthesizer to prune large parts of the search space.
We have implemented a general-purpose CDCL-style
program synthesizer called Neo and evaluate it in two
different application domains, namely data wrangling in
R and functional programming over lists. Our
experiments demonstrate the substantial benefits of
conflict-driven learning and show that Neo outperforms
two state-of-the-art synthesis tools, Morpheus and
Deepcoder, that target these respective domains.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Lee:2018:ASB,
author = "Woosuk Lee and Kihong Heo and Rajeev Alur and Mayur
Naik",
title = "Accelerating search-based program synthesis using
learned probabilistic models",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "436--449",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192410",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A key challenge in program synthesis concerns how to
efficiently search for the desired program in the space
of possible programs. We propose a general approach to
accelerate search-based program synthesis by biasing
the search towards likely programs. Our approach
targets a standard formulation, syntax-guided synthesis
(SyGuS), by extending the grammar of possible programs
with a probabilistic model dictating the likelihood of
each program. We develop a weighted search algorithm to
efficiently enumerate programs in order of their
likelihood. We also propose a method based on transfer
learning that enables to effectively learn a powerful
model, called probabilistic higher-order grammar, from
known solutions in a domain. We have implemented our
approach in a tool called Euphony and evaluate it on
SyGuS benchmark problems from a variety of domains. We
show that Euphony can learn good models using easily
obtainable solutions, and achieves significant
performance gains over existing general-purpose as well
as domain-specific synthesizers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Paletov:2018:ICA,
author = "Rumen Paletov and Petar Tsankov and Veselin Raychev
and Martin Vechev",
title = "Inferring crypto {API} rules from code changes",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "450--464",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192403",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Creating and maintaining an up-to-date set of security
rules that match misuses of crypto APIs is challenging,
as crypto APIs constantly evolve over time with new
cryptographic primitives and settings, making existing
ones obsolete. To address this challenge, we present a
new approach to extract security fixes from thousands
of code changes. Our approach consists of: (i)
identifying code changes, which often capture security
fixes, (ii) an abstraction that filters irrelevant code
changes (such as refactorings), and (iii) a clustering
analysis that reveals commonalities between semantic
code changes and helps in eliciting security rules. We
applied our approach to the Java Crypto API and showed
that it is effective: (i) our abstraction effectively
filters non-semantic code changes (over 99\% of all
changes) without removing security fixes, and (ii) over
80\% of the code changes are security fixes identifying
security rules. Based on our results, we identified 13
rules, including new ones not supported by existing
security checkers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Gulwani:2018:ACP,
author = "Sumit Gulwani and Ivan Radicek and Florian Zuleger",
title = "Automated clustering and program repair for
introductory programming assignments",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "465--480",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192387",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Providing feedback on programming assignments is a
tedious task for the instructor, and even impossible in
large Massive Open Online Courses with thousands of
students. Previous research has suggested that program
repair techniques can be used to generate feedback in
programming education. In this paper, we present a
novel fully automated program repair algorithm for
introductory programming assignments. The key idea of
the technique, which enables automation and
scalability, is to use the existing correct student
solutions to repair the incorrect attempts. We evaluate
the approach in two experiments: (I) We evaluate the
number, size and quality of the generated repairs on
4,293 incorrect student attempts from an existing MOOC.
We find that our approach can repair 97\% of student
attempts, while 81\% of those are small repairs of good
quality. (II) We conduct a preliminary user study on
performance and repair usefulness in an interactive
teaching setting. We obtain promising initial results
(the average usefulness grade 3.4 on a scale from 1 to
5), and conclude that our approach can be used in an
interactive setting.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Wang:2018:SAR,
author = "Ke Wang and Rishabh Singh and Zhendong Su",
title = "Search, align, and repair: data-driven feedback
generation for introductory programming exercises",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "481--495",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192384",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces the ``Search, Align, and
Repair'' data-driven program repair framework to
automate feedback generation for introductory
programming exercises. Distinct from existing
techniques, our goal is to develop an efficient, fully
automated, and problem-agnostic technique for large or
MOOC-scale introductory programming courses. We
leverage the large amount of available student
submissions in such settings and develop new algorithms
for identifying similar programs, aligning correct and
incorrect programs, and repairing incorrect programs by
finding minimal fixes. We have implemented our
technique in the Sarfgen system and evaluated it on
thousands of real student attempts from the
Microsoft-DEV204.1x edX course and the Microsoft
CodeHunt platform. Our results show that Sarfgen can,
within two seconds on average, generate concise, useful
feedback for 89.7\% of the incorrect student
submissions. It has been integrated with the
Microsoft-DEV204.1X edX class and deployed for
production use.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Ngo:2018:BER,
author = "Van Chan Ngo and Quentin Carbonneaux and Jan
Hoffmann",
title = "Bounded expectations: resource analysis for
probabilistic programs",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "496--512",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192394",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a new static analysis for deriving
upper bounds on the expected resource consumption of
probabilistic programs. The analysis is fully automatic
and derives symbolic bounds that are multivariate
polynomials in the inputs. The new technique combines
manual state-of-the-art reasoning techniques for
probabilistic programs with an effective method for
automatic resource-bound analysis of deterministic
programs. It can be seen as both, an extension of
automatic amortized resource analysis (AARA) to
probabilistic programs and an automation of manual
reasoning for probabilistic programs that is based on
weakest preconditions. An advantage of the technique is
that it combines the clarity and compositionality of a
weakest-precondition calculus with the efficient
automation of AARA. As a result, bound inference can be
reduced to off-the-shelf LP solving in many cases and
automatically-derived bounds can be interactively
extended with standard program logics if the automation
fails. Building on existing work, the soundness of the
analysis is proved with respect to an operational
semantics that is based on Markov decision processes.
The effectiveness of the technique is demonstrated with
a prototype implementation that is used to
automatically analyze 39 challenging probabilistic
programs and randomized algorithms. Experiments
indicate that the derived constant factors in the
bounds are very precise and even optimal for some
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Wang:2018:PAF,
author = "Di Wang and Jan Hoffmann and Thomas Reps",
title = "{PMAF}: an algebraic framework for static analysis of
probabilistic programs",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "513--528",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192408",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Automatically establishing that a probabilistic
program satisfies some property $ \varphi $ is a
challenging problem. While a sampling-based
approach-which involves running the program
repeatedly-can suggest that $ \varphi $ holds, to
establish that the program satisfies $ \varphi $ ,
analysis techniques must be used. Despite recent
successes, probabilistic static analyses are still more
difficult to design and implement than their
deterministic counterparts. This paper presents a
framework, called PMAF, for designing, implementing,
and proving the correctness of static analyses of
probabilistic programs with challenging features such
as recursion, unstructured control-flow, divergence,
nondeterminism, and continuous distributions. PMAF
introduces pre-Markov algebras to factor out common
parts of different analyses. To perform interprocedural
analysis and to create procedure summaries, PMAF
extends ideas from non-probabilistic interprocedural
dataflow analysis to the probabilistic setting. One
novelty is that PMAF is based on a semantics formulated
in terms of a control-flow hyper-graph for each
procedure, rather than a standard control-flow graph.
To evaluate its effectiveness, PMAF has been used to
reformulate and implement existing intra procedural
analyses for Bayesian-inference and the Markov decision
problem, by creating corresponding inter procedural
analyses. Additionally, PMAF has been used to implement
a new interprocedural linear expectation-invariant
analysis. Experiments with benchmark programs for the
three analyses demonstrate that the approach is
practical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Acharya:2018:PAT,
author = "Aravind Acharya and Uday Bondhugula and Albert Cohen",
title = "Polyhedral auto-transformation with no integer linear
programming",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "529--542",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192401",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "State-of-the-art algorithms used in automatic
polyhedral transformation for parallelization and
locality optimization typically rely on Integer Linear
Programming (ILP). This poses a scalability issue when
scaling to tens or hundreds of statements, and may be
disconcerting in production compiler settings. In this
work, we consider relaxing integrality in the ILP
formulation of the Pluto algorithm, a popular algorithm
used to find good affine transformations. We show that
the rational solutions obtained from the relaxed LP
formulation can easily be scaled to valid integral ones
to obtain desired solutions, although with some
caveats. We first present formal results connecting the
solution of the relaxed LP to the original Pluto ILP.
We then show that there are difficulties in realizing
the above theoretical results in practice, and propose
an alternate approach to overcome those while still
leveraging linear programming. Our new approach obtains
dramatic compile-time speedups for a range of large
benchmarks. While achieving these compile-time
improvements, we show that the performance of the
transformed code is not sacrificed. Our approach to
automatic transformation provides a mean compilation
time improvement of 5.6$ \times $ over state-of-the-art
on relevant challenging benchmarks from the NAS PB,
SPEC CPU 2006, and PolyBench suites. We also came
across situations where prior frameworks failed to find
a transformation in a reasonable amount of time, while
our new approach did so instantaneously.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Moll:2018:PCF,
author = "Simon Moll and Sebastian Hack",
title = "Partial control-flow linearization",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "543--556",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192413",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "If-conversion is a fundamental technique for
vectorization. It accounts for the fact that in a SIMD
program, several targets of a branch might be executed
because of divergence. Especially for irregular
data-parallel workloads, it is crucial to avoid
if-converting non-divergent branches to increase SIMD
utilization. In this paper, we present partial
linearization, a simple and efficient if-conversion
algorithm that overcomes several limitations of
existing if-conversion techniques. In contrast to prior
work, it has provable guarantees on which non-divergent
branches are retained and will never duplicate code or
insert additional branches. We show how our algorithm
can be used in a classic loop vectorizer as well as to
implement data-parallel languages such as ISPC or
OpenCL. Furthermore, we implement prior vectorizer
optimizations on top of partial linearization in a more
general way. We evaluate the implementation of our
algorithm in LLVM on a range of irregular data
analytics kernels, a neutronics simulation benchmark
and NAB, a molecular dynamics benchmark from SPEC2017
on AVX2, AVX512, and ARM Advanced SIMD machines and
report speedups of up to 146 \% over ICC, GCC and Clang
O3.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Chen:2018:LAT,
author = "Dong Chen and Fangzhou Liu and Chen Ding and Sreepathi
Pai",
title = "Locality analysis through static parallel sampling",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "557--570",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192402",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Locality analysis is important since accessing memory
is much slower than computing. Compile-time locality
analysis can provide detailed program-level feedback
for compilers or runtime systems faster than
trace-based locality analysis. In this paper, we
describe a new approach to locality analysis based on
static parallel sampling. A compiler analyzes
loop-based code and generates sampler code which is run
to measure locality. Our approach can predict precise
cache line granularity miss ratio curves for complex
loops with non-linear array references and even
branches. The precision and overhead of static sampling
are evaluated using PolyBench and a bit-reversal loop.
Our result shows that by randomly sampling 2\% of loop
iterations, a compiler can construct almost exact miss
ratio curves as trace based analysis. Sampling 0.5\%
and 1\% iterations can achieve good precision and
efficiency with an average 0.6\% to 1\% the time of
tracing respectively. Our analysis can also be
parallelized. The analysis may assist program
optimization techniques such as tiling, program
co-location, cache hint selection and help to analyze
write locality and parallel locality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Cusumano-Towner:2018:IIP,
author = "Marco Cusumano-Towner and Benjamin Bichsel and Timon
Gehr and Martin Vechev and Vikash K. Mansinghka",
title = "Incremental inference for probabilistic programs",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "571--585",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192399",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a novel approach for approximate sampling
in probabilistic programs based on incremental
inference. The key idea is to adapt the samples for a
program P into samples for a program Q, thereby
avoiding the expensive sampling computation for program
Q. To enable incremental inference in probabilistic
programming, our work: (i) introduces the concept of a
trace translator which adapts samples from P into
samples of Q, (ii) phrases this translation approach in
the context of sequential Monte Carlo (SMC), which
gives theoretical guarantees that the adapted samples
converge to the distribution induced by Q, and (iii)
shows how to obtain a concrete trace translator by
establishing a correspondence between the random
choices of the two probabilistic programs. We
implemented our approach in two different probabilistic
programming systems and showed that, compared to
methods that sample the program Q from scratch,
incremental inference can lead to orders of magnitude
increase in efficiency, depending on how closely
related P and Q are.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Gehr:2018:BPI,
author = "Timon Gehr and Sasa Misailovic and Petar Tsankov and
Laurent Vanbever and Pascal Wiesmann and Martin
Vechev",
title = "{Bayonet}: probabilistic inference for networks",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "586--602",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192400",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Network operators often need to ensure that important
probabilistic properties are met, such as that the
probability of network congestion is below a certain
threshold. Ensuring such properties is challenging and
requires both a suitable language for probabilistic
networks and an automated procedure for answering
probabilistic inference queries. We present Bayonet, a
novel approach that consists of: (i) a probabilistic
network programming language and (ii) a system that
performs probabilistic inference on Bayonet programs.
The key insight behind Bayonet is to phrase the problem
of probabilistic network reasoning as inference in
existing probabilistic languages. As a result, Bayonet
directly leverages existing probabilistic inference
systems and offers a flexible and expressive interface
to operators. We present a detailed evaluation of
Bayonet on common network scenarios, such as network
congestion, reliability of packet delivery, and others.
Our results indicate that Bayonet can express such
practical scenarios and answer queries for realistic
topology sizes (with up to 30 nodes).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Mansinghka:2018:PPP,
author = "Vikash K. Mansinghka and Ulrich Schaechtle and Shivam
Handa and Alexey Radul and Yutian Chen and Martin
Rinard",
title = "Probabilistic programming with programmable
inference",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "603--616",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192409",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We introduce inference metaprogramming for
probabilistic programming languages, including new
language constructs, a formalism, and the rst
demonstration of effectiveness in practice. Instead of
relying on rigid black-box inference algorithms
hard-coded into the language implementation as in
previous probabilistic programming languages, inference
metaprogramming enables developers to (1) dynamically
decompose inference problems into subproblems, (2)
apply inference tactics to subproblems, (3) alternate
between incorporating new data and performing inference
over existing data, and (4) explore multiple execution
traces of the probabilistic program at once.
Implemented tactics include gradient-based
optimization, Markov chain Monte Carlo, variational
inference, and sequental Monte Carlo techniques.
Inference metaprogramming enables the concise
expression of probabilistic models and inference
algorithms across diverse elds, such as computer
vision, data science, and robotics, within a single
probabilistic programming language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Bohrer:2018:VVC,
author = "Brandon Bohrer and Yong Kiam Tan and Stefan Mitsch and
Magnus O. Myreen and Andr{\'e} Platzer",
title = "{VeriPhy}: verified controller executables from
verified cyber--physical system models",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "617--630",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192406",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present VeriPhy, a verified pipeline which
automatically transforms verified high-level models of
safety-critical cyber-physical systems (CPSs) in
differential dynamic logic (dL) to verified controller
executables. VeriPhy proves that all safety results are
preserved end-to-end as it bridges abstraction gaps,
including: (i) the gap between mathematical reals in
physical models and machine arithmetic in the
implementation, (ii) the gap between real physics and
its differential-equation models, and (iii) the gap
between nondeterministic controller models and machine
code. VeriPhy reduces CPS safety to the faithfulness of
the physical environment, which is checked at runtime
by synthesized, verified monitors. We use three provers
in this effort: KeYmaera X, HOL4, and Isabelle/HOL. To
minimize the trusted base, we cross-verify KeYmaeraX in
Isabelle/HOL. We evaluate the resulting controller and
monitors on commodity robotics hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Kang:2018:CVC,
author = "Jeehoon Kang and Yoonseung Kim and Youngju Song and
Juneyoung Lee and Sanghoon Park and Mark Dongyeon Shin
and Yonghyun Kim and Sungkeun Cho and Joonwon Choi and
Chung-Kil Hur and Kwangkeun Yi",
title = "{Crellvm}: verified credible compilation for {LLVM}",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "631--645",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192377",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Production compilers such as GCC and LLVM are large
complex software systems, for which achieving a high
level of reliability is hard. Although testing is an
effective method for finding bugs, it alone cannot
guarantee a high level of reliability. To provide a
higher level of reliability, many approaches that
examine compilers' internal logics have been proposed.
However, none of them have been successfully applied to
major optimizations of production compilers. This paper
presents Crellvm: a verified credible compilation
framework for LLVM, which can be used as a systematic
way of providing a high level of reliability for major
optimizations in LLVM. Specifically, we augment an LLVM
optimizer to generate translation results together with
their correctness proofs, which can then be checked by
a proof checker formally verified in Coq. As case
studies, we applied our approach to two major
optimizations of LLVM: register promotion mem2reg and
global value numbering gvn, having found four new
miscompilation bugs (two in each).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Gu:2018:CCA,
author = "Ronghui Gu and Zhong Shao and Jieung Kim and Xiongnan
(Newman) Wu and J{\'e}r{\'e}mie Koenig and Vilhelm
Sj{\"o}berg and Hao Chen and David Costanzo and Tahina
Ramananandro",
title = "Certified concurrent abstraction layers",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "646--661",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192381",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Concurrent abstraction layers are ubiquitous in modern
computer systems because of the pervasiveness of
multithreaded programming and multicore hardware.
Abstraction layers are used to hide the implementation
details (e.g., fine-grained synchronization) and reduce
the complex dependencies among components at different
levels of abstraction. Despite their obvious
importance, concurrent abstraction layers have not been
treated formally. This severely limits the
applicability of layer-based techniques and makes it
difficult to scale verification across multiple
concurrent layers. In this paper, we present CCAL---a
fully mechanized programming toolkit developed under
the CertiKOS project---for specifying, composing,
compiling, and linking certified concurrent abstraction
layers. CCAL consists of three technical novelties: a
new game-theoretical, strategy-based compositional
semantic model for concurrency (and its associated
program verifiers), a set of formal linking theorems
for composing multithreaded and multicore concurrent
layers, and a new CompCertX compiler that supports
certified thread-safe compilation and linking. The CCAL
toolkit is implemented in Coq and supports layered
concurrent programming in both C and assembly. It has
been successfully applied to build a fully certified
concurrent OS kernel with fine-grained locking.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Taube:2018:MDD,
author = "Marcelo Taube and Giuliano Losa and Kenneth L.
McMillan and Oded Padon and Mooly Sagiv and Sharon
Shoham and James R. Wilcox and Doug Woos",
title = "Modularity for decidability of deductive verification
with applications to distributed systems",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "662--677",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192414",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Proof automation can substantially increase
productivity in formal verification of complex systems.
However, unpredictablility of automated provers in
handling quantified formulas presents a major hurdle to
usability of these tools. We propose to solve this
problem not by improving the provers, but by using a
modular proof methodology that allows us to produce
decidable verification conditions. Decidability greatly
improves predictability of proof automation, resulting
in a more practical verification approach. We apply
this methodology to develop verified implementations of
distributed protocols, demonstrating its
effectiveness.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Bastani:2018:ALP,
author = "Osbert Bastani and Rahul Sharma and Alex Aiken and
Percy Liang",
title = "Active learning of points-to specifications",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "678--692",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192383",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When analyzing programs, large libraries pose
significant challenges to static points-to analysis. A
popular solution is to have a human analyst provide
points-to specifications that summarize relevant
behaviors of library code, which can substantially
improve precision and handle missing code such as
native code. We propose Atlas, a tool that
automatically infers points-to specifications. Atlas
synthesizes unit tests that exercise the library code,
and then infers points-to specifications based on
observations from these executions. Atlas automatically
infers specifications for the Java standard library,
and produces better results for a client static
information flow analysis on a benchmark of 46 Android
apps compared to using existing handwritten
specifications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Shi:2018:PFP,
author = "Qingkai Shi and Xiao Xiao and Rongxin Wu and Jinguo
Zhou and Gang Fan and Charles Zhang",
title = "{Pinpoint}: fast and precise sparse value flow
analysis for million lines of code",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "693--706",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192418",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "When dealing with millions of lines of code, we still
cannot have the cake and eat it: sparse value-flow
analysis is powerful in checking source-sink problems,
but existing work cannot escape from the ``pointer
trap'' --- a precise points-to analysis limits its
scalability and an imprecise one seriously undermines
its precision. We present Pinpoint, a holistic approach
that decomposes the cost of high-precision points-to
analysis by precisely discovering local data dependence
and delaying the expensive inter-procedural analysis
through memorization. Such memorization enables the
on-demand slicing of only the necessary
inter-procedural data dependence and path feasibility
queries, which are then solved by a costly SMT solver.
Experiments show that Pinpoint can check programs such
as MySQL (around 2 million lines of code) within 1.5
hours. The overall false positive rate is also very low
(14.3\% --- 23.6\%). Pinpoint has discovered over forty
real bugs in mature and extensively checked open source
systems. And the implementation of Pinpoint and all
experimental results are freely available.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Zhu:2018:DDC,
author = "He Zhu and Stephen Magill and Suresh Jagannathan",
title = "A data-driven {CHC} solver",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "707--721",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192416",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a data-driven technique to solve
Constrained Horn Clauses (CHCs) that encode
verification conditions of programs containing
unconstrained loops and recursions. Our CHC solver
neither constrains the search space from which a
predicate's components are inferred (e.g., by
constraining the number of variables or the values of
coefficients used to specify an invariant), nor fixes
the shape of the predicate itself (e.g., by bounding
the number and kind of logical connectives). Instead,
our approach is based on a novel machine
learning-inspired tool chain that synthesizes CHC
solutions in terms of arbitrary Boolean combinations of
unrestricted atomic predicates. A CEGAR-based
verification loop inside the solver progressively
samples representative positive and negative data from
recursive CHCs, which is fed to the machine learning
tool chain. Our solver is implemented as an LLVM pass
in the SeaHorn verification framework and has been used
to successfully verify a large number of nontrivial and
challenging C programs from the literature and
well-known benchmark suites (e.g., SV-COMP).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Raghothaman:2018:UGP,
author = "Mukund Raghothaman and Sulekha Kulkarni and Kihong Heo
and Mayur Naik",
title = "User-guided program reasoning using {Bayesian}
inference",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "722--735",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192417",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Program analyses necessarily make approximations that
often lead them to report true alarms interspersed with
many false alarms. We propose a new approach to
leverage user feedback to guide program analyses
towards true alarms and away from false alarms. Our
approach associates each alarm with a confidence value
by performing Bayesian inference on a probabilistic
model derived from the analysis rules. In each
iteration, the user inspects the alarm with the highest
confidence and labels its ground truth, and the
approach recomputes the confidences of the remaining
alarms given this feedback. It thereby maximizes the
return on the effort by the user in inspecting each
alarm. We have implemented our approach in a tool named
Bingo for program analyses expressed in Datalog.
Experiments with real users and two sophisticated
analyses---a static datarace analysis for Java programs
and a static taint analysis for Android apps---show
significant improvements on a range of metrics,
including false alarm rates and number of bugs found.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Hong:2018:GCO,
author = "Changwan Hong and Aravind Sukumaran-Rajam and Jinsung
Kim and Prashant Singh Rawat and Sriram Krishnamoorthy
and Louis-No{\"e}l Pouchet and Fabrice Rastello and P.
Sadayappan",
title = "{GPU} code optimization using abstract kernel
emulation and sensitivity analysis",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "736--751",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192397",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In this paper, we develop an approach to GPU kernel
optimization by focusing on identification of
bottleneck resources and determining optimization
parameters that can alleviate the bottleneck.
Performance modeling for GPUs is done by abstract
kernel emulation along with latency/gap modeling of
resources. Sensitivity analysis with respect to
resource latency/gap parameters is used to predict the
bottleneck resource for a given kernel's execution. The
utility of the bottleneck analysis is demonstrated in
two contexts: (1) Coupling the new bottleneck-driven
optimization strategy with the OpenTuner auto-tuner:
experimental results on all kernels from the Rodinia
suite and GPU tensor contraction kernels from the
NWChem computational chemistry suite demonstrate
effectiveness. (2) Manual code optimization: two case
studies illustrate the use of the bottleneck analysis
to iteratively improve the performance of code from
state-of-the-art domain-specific code generators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Dathathri:2018:GCO,
author = "Roshan Dathathri and Gurbinder Gill and Loc Hoang and
Hoang-Vu Dang and Alex Brooks and Nikoli Dryden and
Marc Snir and Keshav Pingali",
title = "{Gluon}: a communication-optimizing substrate for
distributed heterogeneous graph analytics",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "752--768",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192404",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces a new approach to building
distributed-memory graph analytics systems that
exploits heterogeneity in processor types (CPU and
GPU), partitioning policies, and programming models.
The key to this approach is Gluon, a
communication-optimizing substrate. Programmers write
applications in a shared-memory programming system of
their choice and interface these applications with
Gluon using a lightweight API. Gluon enables these
programs to run on heterogeneous clusters and optimizes
communication in a novel way by exploiting structural
and temporal invariants of graph partitioning policies.
To demonstrate Gluon's ability to support different
programming models, we interfaced Gluon with the Galois
and Ligra shared-memory graph analytics systems to
produce distributed-memory versions of these systems
named D-Galois and D-Ligra, respectively. To
demonstrate Gluon's ability to support heterogeneous
processors, we interfaced Gluon with IrGL, a
state-of-the-art single-GPU system for graph analytics,
to produce D-IrGL, the first multi-GPU
distributed-memory graph analytics system. Our
experiments were done on CPU clusters with up to 256
hosts and roughly 70,000 threads and on multi-GPU
clusters with up to 64 GPUs. The communication
optimizations in Gluon improve end-to-end application
execution time by ~2.6$ \times $ on the average.
D-Galois and D-IrGL scale well and are faster than
Gemini, the state-of-the-art distributed CPU graph
analytics system, by factors of ~3.9$ \times $ and
~4.9$ \times $, respectively, on the average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Acar:2018:HSP,
author = "Umut A. Acar and Arthur Chargu{\'e}raud and Adrien
Guatto and Mike Rainey and Filip Sieczkowski",
title = "Heartbeat scheduling: provable efficiency for nested
parallelism",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "769--782",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192391",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A classic problem in parallel computing is to take a
high-level parallel program written, for example, in
nested-parallel style with fork-join constructs and run
it efficiently on a real machine. The problem could be
considered solved in theory, but not in practice,
because the overheads of creating and managing parallel
threads can overwhelm their benefits. Developing
efficient parallel codes therefore usually requires
extensive tuning and optimizations to reduce
parallelism just to a point where the overheads become
acceptable. In this paper, we present a scheduling
technique that delivers provably efficient results for
arbitrary nested-parallel programs, without the tuning
needed for controlling parallelism overheads. The basic
idea behind our technique is to create threads only at
a beat (which we refer to as the ``heartbeat'') and
make sure to do useful work in between. We specify our
heartbeat scheduler using an abstract-machine semantics
and provide mechanized proofs that the scheduler
guarantees low overheads for all nested parallel
programs. We present a prototype C++ implementation and
an evaluation that shows that Heartbeat competes well
with manually optimized Cilk Plus codes, without
requiring manual tuning.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Serrano:2018:GIP,
author = "Alejandro Serrano and Jurriaan Hage and Dimitrios
Vytiniotis and Simon Peyton Jones",
title = "Guarded impredicative polymorphism",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "783--796",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192389",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The design space for type systems that support
impredicative instantiation is extremely complicated.
One needs to strike a balance between expressiveness,
simplicity for both the end programmer and the type
system implementor, and how easily the system can be
integrated with other advanced type system concepts. In
this paper, we propose a new point in the design space,
which we call guarded impredicativity. Its key idea is
that impredicative instantiation in an application is
allowed for type variables that occur under a type
constructor. The resulting type system has a clean
declarative specification --- making it easy for
programmers to predict what will type and what will not
-, allows for a smooth integration with GHC's
OutsideIn(X) constraint solving framework, while giving
up very little in terms of expressiveness compared to
systems like HMF, HML, FPH and MLF. We give a sound and
complete inference algorithm, and prove a principal
type property for our system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Bowman:2018:TCC,
author = "William J. Bowman and Amal Ahmed",
title = "Typed closure conversion for the calculus of
constructions",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "797--811",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192372",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dependently typed languages such as Coq are used to
specify and verify the full functional correctness of
source programs. Type-preserving compilation can be
used to preserve these specifications and proofs of
correctness through compilation into the generated
target-language programs. Unfortunately,
type-preserving compilation of dependent types is hard.
In essence, the problem is that dependent type systems
are designed around high-level compositional
abstractions to decide type checking, but compilation
interferes with the type-system rules for reasoning
about run-time terms. We develop a type-preserving
closure-conversion translation from the Calculus of
Constructions (CC) with strong dependent pairs ($
\Sigma $ types) --- a subset of the core language of
Coq --- to a type-safe, dependently typed compiler
intermediate language named CC-CC. The central
challenge in this work is how to translate the source
type-system rules for reasoning about functions into
target type-system rules for reasoning about closures.
To justify these rules, we prove soundness of CC-CC by
giving a model in CC. In addition to type preservation,
we prove correctness of separate compilation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Pombrio:2018:ITR,
author = "Justin Pombrio and Shriram Krishnamurthi",
title = "Inferring type rules for syntactic sugar",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "812--825",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192398",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type systems and syntactic sugar are both valuable to
programmers, but sometimes at odds. While sugar is a
valuable mechanism for implementing realistic
languages, the expansion process obscures program
source structure. As a result, type errors can
reference terms the programmers did not write (and even
constructs they do not know), baffling them. The
language developer must also manually construct type
rules for the sugars, to give a typed account of the
surface language. We address these problems by
presenting a process for automatically reconstructing
type rules for the surface language using rules for the
core. We have implemented this theory, and show several
interesting case studies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "PLDI '18 proceedings.",
}
@Article{Byma:2018:DHP,
author = "Stuart Byma and James R. Larus",
title = "Detailed heap profiling",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "1--13",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210564",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern software systems heavily use the memory heap.
As systems grow more complex and compute with
increasing amounts of data, it can be difficult for
developers to understand how their programs actually
use the bytes that they allocate on the heap and
whether improvements are possible. To answer this
question of heap usage efficiency, we have built a new,
detailed heap profiler called Memoro. Memoro uses a
combination of static instrumentation, subroutine
interception, and runtime data collection to build a
clear picture of exactly when and where a program
performs heap allocation, and crucially how it actually
uses that memory. Memoro also introduces a new
visualization application that can distill collected
data into scores and visual cues that allow developers
to quickly pinpoint and eliminate inefficient heap
usage in their software. Our evaluation and experience
with several applications demonstrates that Memoro can
reduce heap usage and produce runtime improvements of
10\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Tripp:2018:FHP,
author = "Charles Tripp and David Hyde and Benjamin
Grossman-Ponemon",
title = "{FRC}: a high-performance concurrent parallel deferred
reference counter for {C++}",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "14--28",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present FRC, a high-performance concurrent parallel
reference counter for unmanaged languages. It is well
known that high-performance garbage collectors help
developers write memory-safe, highly concurrent systems
and data structures. While C++, C, and other unmanaged
languages are used in high-performance applications,
adding concurrent memory management to these languages
has proven to be difficult. Unmanaged languages like
C++ use pointers instead of references, and have
uncooperative mutators which do not pause easily at a
safe point. Thus, scanning mutator stack root
references is challenging. FRC only defers decrements
and does not require mutator threads to pause during
collection. By deferring only decrements, FRC avoids
much of the synchronization overhead of a
fully-deferred implementation. Root references are
scanned without interrupting the mutator by publishing
these references to a thread-local array. FRC's
performance can exceed that of the C++ standard
library's shared pointer by orders of magnitude. FRC's
thread-safety guarantees and low synchronization
overhead enable significant throughput gains for
concurrently-readable shared data structures. We
describe the components of FRC, including our static
tree router data structure: a novel barrier which
improves the scalability of parallel collection
workers. FRC's performance is evaluated on several
concurrent data structures. We release FRC and our
tests as open-source code and expect FRC will be useful
for many concurrent C++ software systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Brandt:2018:DGC,
author = "Steven R. Brandt and Hari Krishnan and Costas Busch
and Gokarna Sharma",
title = "Distributed garbage collection for general graphs",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "29--44",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210572",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose a scalable, cycle-collecting,
decentralized, reference counting garbage collector
with partial tracing. The algorithm is based on the
Brownbridge system but uses four different types of
references to label edges. Memory usage is O (log n)
bits per node, where n is the number of nodes in the
graph. The algorithm assumes an asynchronous network
model with a reliable reordering channel. It collects
garbage in O (E a ) time, where E a is the number of
edges in the induced subgraph. The algorithm uses
termination detection to manage the distributed
computation, a unique identifier to break the symmetry
among multiple collectors, and a transaction-based
approach when multiple collectors conflict. Unlike
existing algorithms, ours is not centralized, does not
require barriers, does not require migration of nodes,
does not require back-pointers on every edge, and is
stable against concurrent mutation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Ismail:2018:HSC,
author = "Mohamed Ismail and G. Edward Suh",
title = "Hardware-software co-optimization of memory management
in dynamic languages",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "45--58",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210566",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Dynamic programming languages are becoming
increasingly popular, yet often show a significant
performance slowdown compared to static languages. In
this paper, we study the performance overhead of
automatic memory management in dynamic languages. We
propose to improve the performance and memory bandwidth
usage of dynamic languages by co-optimizing garbage
collection overhead and cache performance for
newly-initialized and dead objects. Our study shows
that less frequent garbage collection results in a
large number of cache misses for initial stores to new
objects. We solve this problem by directly placing
uninitialized objects into on-chip caches without
off-chip memory accesses. We further optimize the
garbage collection by reducing unnecessary cache
pollution and write-backs through partial tracing that
invalidates dead objects between full garbage
collections. Experimental results on PyPy and V8 show
that less frequent garbage collection along with our
optimizations can significantly improve the performance
of dynamic languages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Bruno:2018:DVM,
author = "Rodrigo Bruno and Paulo Ferreira and Ruslan Synytsky
and Tetiana Fydorenchyk and Jia Rao and Hang Huang and
Song Wu",
title = "Dynamic vertical memory scalability for {OpenJDK}
cloud applications",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "59--70",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210567",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The cloud is an increasingly popular platform to
deploy applications as it lets cloud users to provide
resources to their applications as needed. Furthermore,
cloud providers are now starting to offer a
``pay-as-you-use'' model in which users are only
charged for the resources that are really used instead
of paying for a statically sized instance. This new
model allows cloud users to save money, and cloud
providers to better utilize their hardware. However,
applications running on top of runtime environments
such as the Java Virtual Machine (JVM) cannot benefit
from this new model because they cannot dynamically
adapt the amount of used resources at runtime. In
particular, if an application needs more memory than
what was initially predicted at launch time, the JVM
will not allow the application to grow its memory
beyond the maximum value defined at launch time. In
addition, the JVM will hold memory that is no longer
being used by the application. This lack of dynamic
vertical scalability completely prevents the benefits
of the ``pay-as-you-use'' model, and forces users to
over-provision resources, and to lose money on unused
resources. We propose a new JVM heap sizing strategy
that allows the JVM to dynamically scale its memory
utilization according to the application's needs.
First, we provide a configurable limit on how much the
application can grow its memory. This limit is dynamic
and can be changed at runtime, as opposed to the
current static limit that can only be set at launch
time. Second, we adapt current Garbage Collection
policies that control how much the heap can grow and
shrink to better fit what is currently being used by
the application. The proposed solution is implemented
in the OpenJDK 9 HotSpot JVM, the new release of
OpenJDK. Changes were also introduced inside the
Parallel Scavenge collector and the Garbage First
collector (the new by-default collector in HotSpot).
Evaluation experiments using real workloads and data
show that, with negligible throughput and memory
overhead, dynamic vertical memory scalability can be
achieved. This allows users to save significant amounts
of money by not paying for unused resources, and cloud
providers to better utilize their physical machines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Kaur:2018:OCM,
author = "Gurneet Kaur and Keval Vora and Sai Charan Koduru and
Rajiv Gupta",
title = "{OMR}: out-of-core {MapReduce} for large data sets",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "71--83",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210568",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "While single machine MapReduce systems can squeeze out
maximum performance from available multi-cores, they
are often limited by the size of main memory and can
thus only process small datasets. Our experience shows
that the state-of-the-art single-machine in-memory
MapReduce system Metis frequently experiences
out-of-memory crashes. Even though today's computers
are equipped with efficient secondary storage devices,
the frameworks do not utilize these devices mainly
because disk access latencies are much higher than
those for main memory. Therefore, the single-machine
setup of the Hadoop system performs much slower when it
is presented with the datasets which are larger than
the main memory. Moreover, such frameworks also require
tuning a lot of parameters which puts an added burden
on the programmer. In this paper we present OMR, an
Out-of-core MapReduce system that not only successfully
handles datasets that are far larger than the size of
main memory, it also guarantees linear scaling with the
growing data sizes. OMR actively minimizes the amount
of data to be read/written to/from disk via on-the-fly
aggregation and it uses block sequential disk
read/write operations whenever disk accesses become
necessary to avoid running out of memory. We
theoretically prove OMR's linear scalability and
empirically demonstrate it by processing datasets that
are up to 5x larger than main memory. Our experiments
show that in comparison to the standalone
single-machine setup of the Hadoop system, OMR delivers
far higher performance. Also in contrast to Metis, OMR
avoids out-of-memory crashes for large datasets as well
as delivers higher performance when datasets are small
enough to fit in main memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Byrne:2018:MMR,
author = "Daniel Byrne and Nilufer Onder and Zhenlin Wang",
title = "{mPart}: miss-ratio curve guided partitioning in
key--value stores",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "84--95",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210571",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Web applications employ key-value stores to cache the
data that is most commonly accessed. The cache improves
an web application's performance by serving its
requests from memory, avoiding fetching them from the
backend database. Since the memory space is limited,
maximizing the memory utilization is a key to
delivering the best performance possible. This has lead
to the use of multi-tenant systems, allowing
applications to share cache space. In addition,
application data access patterns change over time, so
the system should be adaptive in its memory allocation.
In this work, we address both multi-tenancy (where a
single cache is used for multiple applications) and
dynamic workloads (changing access patterns) using a
model that relates the cache size to the application
miss ratio, known as a miss ratio curve. Intuitively,
the larger the cache, the less likely the system will
need to fetch the data from the database. Our
efficient, online construction of the miss ratio curve
allows us to determine a near optimal memory allocation
given the available system memory, while adapting to
changing data access patterns. We show that our model
outperforms an existing state-of-the-art sharing model,
Memshare, in terms of overall cache hit ratio and does
so at a lower time cost. We show that for a typical
system, overall hit ratio is consistently 1 percentage
point greater and 99.9th percentile latency is reduced
by as much as 2.9\% under standard web application
workloads containing millions of requests.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Brock:2018:PBS,
author = "Jacob Brock and Chen Ding and Rahman Lavaee and
Fangzhou Liu and Liang Yuan",
title = "Prediction and bounds on shared cache demand from
memory access interleaving",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "96--108",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210565",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Cache in multicore machines is often shared, and the
cache performance depends on how memory accesses
belonging to different programs interleave with one
another. The full range of performance possibilities
includes all possible interleavings, which are too
numerous to be studied by experiments for any mix of
non-trivial programs. This paper presents a theory to
characterize the effect of memory access interleaving
due to parallel execution of non-data-sharing programs.
The theory uses an established metric called the
footprint (which can be used to calculate miss ratios
in fully-associative LRU caches) to measure cache
demand, and considers the full range of interleaving
possibilities. The paper proves a lower bound for
footprints of interleaved traces, and then formulates
an upper bound in terms of the footprints of the
constituent traces. It also shows the correctness of
footprint composition used in a number of existing
techniques, and places precise bounds on its
accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Horie:2018:BDQ,
author = "Michihiro Horie and Hiroshi Horii and Kazunori Ogata
and Tamiya Onodera",
title = "Balanced double queues for {GC} work-stealing on weak
memory models",
journal = j-SIGPLAN,
volume = "53",
number = "5",
pages = "109--119",
month = may,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299706.3210570",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Work-stealing is promising for scheduling and
balancing parallel workloads. It has a wide range of
applicability on middleware, libraries, and runtime
systems of programming languages. OpenJDK uses
work-stealing for copying garbage collection (GC) to
balance copying tasks among GC threads. Each thread has
its own queue to store tasks. When a thread has no task
in its queue, it acts as a thief and attempts to steal
a task from another thread's queue. However, this
work-stealing algorithm requires expensive memory
fences for pushing, popping, and stealing tasks,
especially on weak memory models such as POWER and ARM.
To address this problem, we propose a work-stealing
algorithm that uses double queues. Each GC thread has a
public queue that is accessible from other GC threads
and a private queue that is only accessible by itself.
Pushing and popping tasks in the private queue are free
from expensive memory fences. The most significant
point in our algorithm is providing a mechanism to
maintain the load balance on the basis of the use of
double queues. We developed a prototype implementation
for parallel GC in OpenJDK8 for ppc64le. We evaluated
our algorithm by using SPECjbb2015, SPECjvm2008,
TPC-DS, and Apache DayTrader.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "ISMM '18 proceedings.",
}
@Article{Santos:2018:MBD,
author = "Rodrigo C. M. Santos and Guilherme F. Lima and
Francisco Sant'Anna and Roberto Ierusalimschy and
Edward H. Haeusler",
title = "A memory-bounded, deterministic and terminating
semantics for the synchronous programming language
{C{\'e}u}",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "1--18",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211334",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "C{\'e}u is a synchronous programming language for
embedded soft real-time systems. It focuses on
control-flow safety features, such as safe
shared-memory concurrency and safe abortion of lines of
execution, while enforcing memory bounded,
deterministic, and terminating reactions to the
environment. In this work, we present a small-step
structural operational semantics for C{\'e}u and a
proof that reactions have the properties enumerated
above: that for a given arbitrary timeline of input
events, multiple executions of the same program always
react in bounded time and arrive at the same final
finite memory state.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Devine:2018:MCI,
author = "James Devine and Joe Finney and Peli de Halleux and
Micha{\l} Moskal and Thomas Ball and Steve Hodges",
title = "{MakeCode} and {CODAL}: intuitive and efficient
embedded systems programming for education",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "19--30",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211335",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Across the globe, it is now commonplace for educators
to engage in the making (design and development) of
embedded systems in the classroom to motivate and
excite their students. This new domain brings its own
set of unique requirements. Historically, embedded
systems development requires knowledge of low-level
programming languages, local installation of
compilation toolchains, device drivers, and
applications. For students and educators, these
requirements can introduce insurmountable barriers. We
present the motivation, requirements, implementation,
and evaluation of a new programming platform that
enables novice users to create software for embedded
systems. The platform has two major components: (1)
Microsoft MakeCode ( www.makecode.com ), a web app that
encapsulates an entire beginner IDE for
microcontrollers; and (2) CODAL, an efficient
component-oriented C++ runtime for microcontrollers. We
show how MakeCode and CODAL provide an accessible,
cross-platform, installation-free programming
experience for the BBC micro:bit and other embedded
devices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Taylor:2018:ADL,
author = "Ben Taylor and Vicent Sanz Marco and Willy Wolff and
Yehia Elkhatib and Zheng Wang",
title = "Adaptive deep learning model selection on embedded
systems",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "31--43",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211336",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The recent ground-breaking advances in deep learning
networks (DNNs) make them attractive for embedded
systems. However, it can take a long time for DNNs to
make an inference on resource-limited embedded devices.
Offloading the computation into the cloud is often
infeasible due to privacy concerns, high latency, or
the lack of connectivity. As such, there is a critical
need to find a way to effectively execute the DNN
models locally on the devices. This paper presents an
adaptive scheme to determine which DNN model to use for
a given input, by considering the desired accuracy and
inference time. Our approach employs machine learning
to develop a predictive model to quickly select a
pre-trained DNN to use for a given input and the
optimization constraint. We achieve this by first
training off-line a predictive model, and then use the
learnt model to select a DNN model to use for new,
unseen inputs. We apply our approach to the image
classification task and evaluate it on a Jetson TX2
embedded deep learning platform using the ImageNet
ILSVRC 2012 validation dataset. We consider a range of
influential DNN models. Experimental results show that
our approach achieves a 7.52\% improvement in inference
accuracy, and a 1.8x reduction in inference time over
the most-capable single DNN model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Han:2018:ORS,
author = "Lei Han and Zhaoyan Shen and Zili Shao and Tao Li",
title = "Optimizing {RAID\slash SSD} controllers with lifetime
extension for flash-based {SSD} array",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "44--54",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211338",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Flash-based SSD RAID arrays are increasingly being
deployed in data centers. Compared with HDD arrays, SSD
arrays drastically enhance storage density and I/O
performance, and reduce power and rack space.
Nevertheless, SSDs suffer aging issues. Though prior
studies have been conducted to address this
disadvantage, effective techniques of RAID/SSD
controllers are urgently needed to extend the lifetime
of SSD arrays. In this paper, we for the first time
apply approximate storage via the interplay of RAID and
SSD controllers to optimize the lifespan of SSD arrays.
Our basic idea is to reuse faulty blocks (those contain
pages with uncorrectable errors) to store approximate
data (which can tolerate more errors). By relaxing the
integrity of flash blocks, we observed that the
endurance of NAND flash memory can be significantly
boosted, thereby providing huge potentials to
significantly extend the lifetime of SSDs. Based on
this observation, we propose the use of an efficient
space management scheme for data allocation and FTL
strategies by coordinating the interplay of RAID and
SSD controllers to optimize the lifetime of SSD arrays.
We implemented a prototype, called FreeRAID, based on
an SSD array simulator. Our experiments show that we
can significantly increase the lifetime by up to 2.17$
\times $ compared with conventional SSD-based RAID
arrays.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Skelin:2018:CSA,
author = "Mladen Skelin and Marc Geilen",
title = "Compositionality in scenario-aware dataflow: a
rendezvous perspective",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "55--64",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211339",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Finite-state machine-based scenario-aware dataflow
(FSM-SADF) is a dynamic dataflow model of computation
that combines streaming data and finite-state control.
For the most part, it preserves the determinism of its
underlying synchronous dataflow (SDF) concurrency model
and only when necessary introduces the
non-deterministic variation in terms of scenarios that
are represented by SDF graphs. This puts FSM-SADF in a
sweet spot in the trade-off space between
expressiveness and analyzability. However, FSM-SADF
supports no notion of compositionality, which hampers
its usability in modeling and consequent analysis of
large systems. In this work we propose a compositional
semantics for FSM-SADF that overcomes this problem. We
base the semantics of the composition on standard
composition of processes with rendezvous communication
in the style of CCS or CSP at the control level and the
parallel, serial and feedback composition of SDF graphs
at the dataflow level. We evaluate the approach on a
case study from the multimedia domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Stokes:2018:DAG,
author = "Michael Stokes and Ryan Baird and Zhaoxiang Jin and
David Whalley and Soner Onder",
title = "Decoupling address generation from loads and stores to
improve data access energy efficiency",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "65--75",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211340",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Level-one data cache (L1 DC) accesses impact energy
usage as they frequently occur and use significantly
more energy than register file accesses. A memory
access instruction consists of an address generation
operation calculating the location where the data item
resides in memory and the data access operation that
loads/stores a value from/to that location. We propose
to decouple these two operations into separate machine
instructions to reduce energy usage. By associating the
data translation lookaside buffer (DTLB) access and
level-one data cache (L1 DC) tag check with an address
generation instruction, only a single data array in a
set-associative L1 DC needs to be accessed during a
load instruction when the result of the tag check is
known at that point. In addition, many DTLB accesses
and L1 DC tag checks are avoided by memoizing the DTLB
way and L1 DC way with the register that holds the
memory address to be dereferenced. Finally, we are able
to often coalesce an ALU operation with a load or store
data access using our technique to reduce the number of
instructions executed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Egger:2018:VCG,
author = "Bernhard Egger and Eunjin Song and Hochan Lee and
Daeyoung Shin",
title = "Verification of coarse-grained reconfigurable arrays
through random test programs",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "76--88",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211342",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We propose and evaluate a framework to test the
functional correctness of coarse-grained reconfigurable
array (CGRA) processors for pre-silicon verification
and post-silicon validation. To reflect the
reconfigurable nature of CGRAs, an architectural model
of the system under test is built directly from the
hardware description files. A guided place-and-routing
algorithm is used to map operations and operands onto
the heterogeneous processing elements (PE). Test
coverage is maximized by favoring unexercised parts of
the architecture. Requiring no explicit knowledge about
the semantics of operations, the random test program
generator (RTPG) framework seamlessly supports custom
ISA extensions. The proposed framework is applied to
the Samsung Reconfigurable Processor, a
modulo-scheduled CGRA integrated in smartphones,
cameras, printers, and smart TVs. Experiments
demonstrate that the RTPG is versatile, efficient, and
quickly achieves a high coverage. In addition to
detecting all randomly inserted faults, the generated
test programs also exposed two yet unknown actual
faults in the architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Chang:2018:DNN,
author = "Andre Xian Ming Chang and Aliasger Zaidy and Lukasz
Burzawa and Eugenio Culurciello",
title = "Deep neural networks compiler for a trace-based
accelerator (short {WIP} paper)",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "89--93",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211333",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Deep Neural Networks (DNNs) are the algorithm of
choice for image processing applications. DNNs present
highly parallel workloads that lead to the emergence of
custom hardware accelerators. Deep Learning (DL) models
specialized in different tasks require a programmable
custom hardware and a compiler/mapper to efficiently
translate different DNNs into an efficient dataflow in
the accelerator. The goal of this paper is to present a
compiler for running DNNs on Snowflake, which is a
programmable hardware accelerator that targets DNNs.
The compiler correctly generates instructions for
various DL models: AlexNet, VGG, ResNet and LightCNN9.
Snowflake, with a varying number of processing units,
was implemented on FPGA to measure the compiler and
Snowflake performance properties upon scaling up. The
system achieves 70 frames/s and 4.5 GB/s of off-chip
memory bandwidth for AlexNet without linear layers on
Xilinx's Zynq-SoC XC7Z045 FPGA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{SantAnna:2018:TSL,
author = "Francisco Sant'Anna and Alexandre Sztajnberg and Ana
L{\'u}cia de Moura and Noemi Rodrigues",
title = "Transparent standby for low-power,
resource-constrained embedded systems: a programming
language-based approach (short {WIP} paper)",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "94--98",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211337",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Standby efficiency for connected devices is one of the
priorities of the G20's Energy Efficiency Action Plan.
We propose transparent programming language mechanisms
to enforce that applications remain in the deepest
standby modes for the longest periods of time. We
extend the programming language C{\'e}u with support
for interrupt service routines and with a simple power
management runtime. Based on these primitives, we also
provide device drivers that allow applications to take
advantage of standby automatically. Our approach relies
on the synchronous semantics of the language which
guarantees that reactions to the environment always
reach an idle state amenable to standby. In addition,
in order to lower the programming barrier of adoption,
we show that programs in C{\'e}u can keep a sequential
syntactic structure, even when applications require
non-trivial concurrent behavior.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Chimdyalwar:2018:SRP,
author = "Bharti Chimdyalwar and Priyanka Darke",
title = "Statically relating program properties for efficient
verification (short {WIP} paper)",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "99--103",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211341",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Efficient automatic verification of real world
embedded software with numerous properties is a
challenge. Existing techniques verify a sufficient
subset of properties by identifying implication
relations between their verification outcomes. We
believe this is expensive and propose a novel
complementary approach called grouping. Grouping does
not consider the verification outcomes but uses data
and control flow characteristics of the program to
create disjoint groups of properties verifiable one
group at a time.We present three grouping techniques, a
framework, and experiments over open source and
industrial applications to support our thesis. The
experiments show a high gain in performance of a few
state-of-the-art tools. This led to the integration of
grouping into the verification process of an automotive
software manufacturer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Chadha:2018:JAS,
author = "Gaurav Chadha",
title = "{JSCore}: architectural support for accelerating
{JavaScript} execution (short {WIP} paper)",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "104--108",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211343",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "JavaScript has seen meteoric growth in popularity as
it has increasingly become the language of choice for
developers, both for front-end web development and
server code development through various JavaScript
frameworks and Node.js. Part of the reason for its wide
use is that it is a prototype based language with
dynamic types, making it easy to learn and program in.
This flexibility and ease of programming comes at the
cost of performance. There are two sources of
significant slowdown. First, since the number and type
of properties of prototypes is dynamic, accessing a
property involves a slow dictionary lookup, as opposed
to it being present at a fixed offset from the base
address. Second, the dynamism in type of values
necessitates wrapping and unwrapping of values into
objects with a variety of checks including for type of
the value. To mitigate these performance problems, this
paper proposes JSCore, a core specialized for
JavaScript execution, that vastly reduces the
performance degradation due to the above two causes. It
uses a hardware lookup table to accelerate property
access, and extends the data path to store data types
with the data, nearly eliminating the second source of
slowdown. Combining the two, JSCore accelerates real
world JavaScript applications by 23\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Mehrotra:2018:OSR,
author = "Pavan Mehrotra and Sabar Dasgupta and Samantha
Robertson and Paul Nuyujukian",
title = "An open-source realtime computational platform (short
{WIP} paper)",
journal = j-SIGPLAN,
volume = "53",
number = "6",
pages = "109--112",
month = jun,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299710.3211344",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/gnu.bib;
https://www.math.utah.edu/pub/tex/bib/python.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Systems neuroscience studies involving in-vivo models
often require realtime data processing. In these
studies, many events must be monitored and processed
quickly, including behavior of the subject (e.g.,
movement of a limb) or features of neural data (e.g., a
neuron transmitting an action potential).
Unfortunately, most realtime platforms are proprietary,
require specific architectures, or are limited to
low-level programming languages. Here we present a
hardware-independent, open-source realtime computation
platform that supports high-level programming. The
resulting platform, LiCoRICE, can process on order
10e10 bits/sec of network data at 1 ms ticks with 18.2
\micro s jitter. It connects to various inputs and
outputs (e.g., DIO, Ethernet, database logging, and
analog line in/out) and minimizes reliance on custom
device drivers by leveraging peripheral support via the
Linux kernel. Its modular architecture supports
model-based design for rapid prototyping with C and
Python/Cython and can perform numerical operations via
BLAS/LAPACK-optimized NumPy that is statically compiled
via Numba's pycc. LiCoRICE is not only suitable for
systems neuroscience research, but also for
applications requiring closed-loop realtime data
processing from robotics and control systems to
interactive applications and quantitative financial
trading.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "LCTES '18 proceedings.",
}
@Article{Mista:2018:BPQ,
author = "Agust{\'\i}n Mista and Alejandro Russo and John
Hughes",
title = "Branching processes for {QuickCheck} generators",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "1--13",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242747",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In QuickCheck (or, more generally, random testing), it
is challenging to control random data generators'
distributions---specially when it comes to user-defined
algebraic data types (ADT). In this paper, we adapt
results from an area of mathematics known as branching
processes, and show how they help to analytically
predict (at compile-time) the expected number of
generated constructors, even in the presence of
mutually recursive or composite ADTs. Using our
probabilistic formulas, we design heuristics capable of
automatically adjusting probabilities in order to
synthesize generators which distributions are aligned
with users' demands. We provide a Haskell
implementation of our mechanism in a tool called DRaGeN
and perform case studies with real-world applications.
When generating random values, our synthesized
QuickCheck generators show improvements in code
coverage when compared with those automatically derived
by state-of-the-art tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Breitner:2018:PCP,
author = "Joachim Breitner",
title = "A promise checked is a promise kept: inspection
testing",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "14--25",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242748",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Occasionally, developers need to ensure that the
compiler treats their code in a specific way that is
only visible by inspecting intermediate or final
compilation artifacts. This is particularly common with
carefully crafted compositional libraries, where
certain usage patterns are expected to trigger an
intricate sequence of compiler optimizations --- stream
fusion is a well-known example. The developer of such a
library has to manually inspect build artifacts and
check for the expected properties. Because this is too
tedious to do often, it will likely go unnoticed if the
property is broken by a change to the library code, its
dependencies or the compiler. The lack of automation
has led to released versions of such libraries breaking
their documented promises. This indicates that there is
an unrecognized need for a new testing paradigm,
inspection testing, where the programmer declaratively
describes non-functional properties of an compilation
artifact and the compiler checks these properties. We
define inspection testing abstractly, implement it in
the context of the Haskell Compiler GHC and show that
it increases the quality of such libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Handley:2018:ACT,
author = "Martin A. T. Handley and Graham Hutton",
title = "{AutoBench}: comparing the time performance of
{Haskell} programs",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "26--37",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242749",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Two fundamental goals in programming are correctness
(producing the right results) and efficiency (using as
few resources as possible). Property-based testing
tools such as QuickCheck provide a lightweight means to
check the correctness of Haskell programs, but what
about their efficiency? In this article, we show how
QuickCheck can be combined with the Criterion
benchmarking library to give a lightweight means to
compare the time performance of Haskell programs. We
present the design and implementation of the AutoBench
system, demonstrate its utility with a number of case
studies, and find that many QuickCheck correctness
properties are also efficiency improvements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Sun:2018:AMB,
author = "Marilyn Sun and Kathleen Fisher",
title = "{Autobahn 2.0}: minimizing bangs while maintaining
performance (system demonstration)",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "38--40",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3264734",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Lazy evaluation has many advantages, but it can cause
bad performance. Consequently, Haskell allows users to
force eager evaluation at certain program points by
inserting strictness annotations, known and written as
bangs (!). Unfortunately, manual bang placement is
difficult. Autobahn 1.0 uses a genetic algorithm to
infer bang annotations that improve performance.
However, Autobahn 1.0 often generates large numbers of
superfluous bangs, which is problematic because users
must inspect each such bang to determine whether it is
safe. We introduce Autobahn 2.0, which uses GHC
profiling information to reduce the number of
superfluous bangs. When evaluated on the NoFib
benchmark suite, Autobahn 2.0 reduced the number of
inferred bangs by 90.2\% on average, while only
degrading program performance by 15.7\% compared with
the performance produced by Autobahn 1.0. In a case
study on a garbage collection simulator, Autobahn 2.0
eliminated 81.8\% of the recommended bangs, with the
same 15.7\% optimization degradation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Serrano:2018:GPA,
author = "Alejandro Serrano and Victor Cacciari Miraldo",
title = "Generic programming of all kinds",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "41--54",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242745",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Datatype-generic programming is a widely used
technique to define functions that work regularly over
a class of datatypes. Examples include deriving
serialization of data, equality or even functoriality.
The state-of-the-art of generic programming still lacks
handling GADTs, multiple type variables, and some other
features. This paper exploits modern GHC extensions,
including {\tt preTypeInType}, to handle arbitrary
number of type variables, constraints, and
existentials. We also provide an Agda model of our
construction that does not require Russel's paradox,
proving the construction is consistent.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Blondal:2018:DHT,
author = "Baldur Bl{\"o}ndal and Andres L{\"o}h and Ryan Scott",
title = "{Deriving Via}: or, how to turn hand-written instances
into an anti-pattern",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "55--67",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242746",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Haskell's deriving construct is a cheap and cheerful
way to quickly generate instances of type classes that
follow common patterns. But at present, there is only a
subset of such type class patterns that deriving
supports, and if a particular class lies outside of
this subset, then one cannot derive it at all, with no
alternative except for laboriously declaring the
instances by hand. To overcome this deficit, we
introduce Deriving Via, an extension to deriving that
enables programmers to compose instances from named
programming patterns, thereby turning deriving into a
high-level domain-specific language for defining
instances. Deriving Via leverages newtypes---an already
familiar tool of the Haskell trade---to declare
recurring patterns in a way that both feels natural and
allows a high degree of abstraction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Martinez:2018:ITR,
author = "Guido Mart{\'\i}nez and Mauro Jaskelioff and Guido {De
Luca}",
title = "Improving typeclass relations by being open",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "68--80",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242751",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Mathematical concepts such as monads, functors,
monoids, and semigroups are expressed in Haskell as
typeclasses. Therefore, in order to exploit relations
such as ``every monad is a functor'', and ``every
monoid is a semigroup'', we need to be able to also
express relations between typeclasses. Currently, the
only way to do so is using superclasses. However,
superclasses can be problematic due to their closed
nature. Adding a superclass implies modifying the
subclass' definition, which is either impossible if one
does not own such code, or painful as it requires
cascading changes and the introduction of boilerplate
throughout the codebase. In this article, we introduce
class morphisms, a way to relate classes in an open
fashion, without changing class definitions. We show
how class morphisms improve the expressivity,
conciseness, and maintainability of code. Further, we
show how to implement them while maintaining canonicity
and coherence, two key properties of the Haskell type
system. Extending a typechecker with class morphisms
amounts to adding an elaboration phase and is an
unintrusive change. We back this claim with a prototype
extension of GHC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Winant:2018:CED,
author = "Thomas Winant and Dominique Devriese",
title = "Coherent explicit dictionary application for
{Haskell}",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "81--93",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242752",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type classes are one of Haskell's most popular
features and extend its type system with ad-hoc
polymorphism. Since their conception, there were useful
features that could not be offered because of the
desire to offer two correctness properties: coherence
and global uniqueness of instances. Coherence
essentially guarantees that program semantics are
independent from type-checker internals. Global
uniqueness of instances is relied upon by libraries for
enforcing, for example, that a single order relation is
used for all manipulations of an ordered binary tree.
The features that could not be offered include explicit
dictionary application and local instances, which would
be highly useful in practice. In this paper, we propose
a new design for offering explicit dictionary
application, without compromising coherence and global
uniqueness. We introduce a novel criterion based on
GHC's type argument roles to decide when a dictionary
application is safe with respect to global uniqueness
of instances. We preserve coherence by detecting
potential sources of incoherence, and prove it
formally. Moreover, our solution makes it possible to
use local dictionaries. In addition to developing our
ideas formally, we have implemented a working prototype
in GHC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Eisenberg:2018:TVP,
author = "Richard A. Eisenberg and Joachim Breitner and Simon
Peyton Jones",
title = "Type variables in patterns",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "94--105",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242753",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "For many years, GHC has implemented an extension to
Haskell that allows type variables to be bound in type
signatures and patterns, and to scope over terms. This
extension was never properly specified. We rectify that
oversight here. With the formal specification in hand,
the otherwise-labyrinthine path toward a design for
binding type variables in patterns becomes blindingly
clear. We thus extend ScopedTypeVariables to bind type
variables explicitly, obviating the Proxy workaround to
the dustbin of history.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Otwani:2018:TPY,
author = "Divesh Otwani and Richard A. Eisenberg",
title = "The {Thoralf} plugin: for your fancy type needs",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "106--118",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242754",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many fancy types (e.g., generalized algebraic data
types, type families) require a type checker plugin.
These fancy types have a type index (e.g., type level
natural numbers) with an equality relation that is
difficult or impossible to represent using GHC's
built-in type equality. The most practical way to
represent these equality relations is through a plugin
that asserts equality constraints. However, such
plugins are difficult to write and reason about. In
this paper, we (1) present a formal theory of reasoning
about the correctness of type checker plugins for type
indices, and, (2) apply this theory in creating
Thoralf, a generic and extensible plugin for type
indices that translates GHC constraint problems to
queries to an external SMT solver. By ``generic and
extensible'', we mean the restrictions on extending
Thoralf are slight, and, if some type index could be
encoded as an SMT sort, then a programmer could extend
Thoralf by providing this encoding function.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Noonan:2018:GDP,
author = "Matt Noonan",
title = "Ghosts of departed proofs (functional pearl)",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "119--131",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242755",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Library authors often are faced with a design choice:
should a function with preconditions be implemented as
a partial function, or by returning a failure condition
on incorrect use? Neither option is ideal. Partial
functions lead to frustrating run-time errors. Failure
conditions must be checked at the use-site, placing an
unfair tax on the users who have ensured that the
function's preconditions were correctly met. In this
paper, we introduce an API design concept called
``ghosts of departed proofs'' based on the following
observation: sophisticated preconditions can be encoded
in Haskell's type system with no run-time overhead, by
using proofs that inhabit phantom type parameters
attached to newtype wrappers. The user expresses
correctness arguments by constructing proofs to inhabit
these phantom types. Critically, this technique allows
the library user to decide when and how to validate
that the API's preconditions are met. The ``ghosts of
departed proofs'' approach to API design can achieve
many of the benefits of dependent types and refinement
types, yet only requires some minor and well-understood
extensions to Haskell 2010. We demonstrate the utility
of this approach through a series of case studies,
showing how to enforce novel invariants for lists,
maps, graphs, shared memory regions, and more.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Vazou:2018:TPA,
author = "Niki Vazou and Joachim Breitner and Rose Kunkel and
David {Van Horn} and Graham Hutton",
title = "Theorem proving for all: equational reasoning in
liquid {Haskell} (functional pearl)",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "132--144",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242756",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Equational reasoning is one of the key features of
pure functional languages such as Haskell. To date,
however, such reasoning always took place externally to
Haskell, either manually on paper, or mechanised in a
theorem prover. This article shows how equational
reasoning can be performed directly and seamlessly
within Haskell itself, and be checked using Liquid
Haskell. In particular, language learners --- to whom
external theorem provers are out of reach --- can
benefit from having their proofs mechanically checked.
Concretely, we show how the equational proofs and
derivations from Graham's textbook can be recast as
proofs in Haskell (spoiler: they look essentially the
same).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Barenz:2018:RFT,
author = "Manuel B{\"a}renz and Ivan Perez",
title = "{Rhine}: {FRP} with type-level clocks",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "145--157",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242757",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Processing data at different rates is generally a hard
problem in reactive programming. Buffering problems,
lags, and concurrency issues often occur. Many of these
problems are clock errors, where data at different
rates is combined incorrectly. Techniques to avoid
clock errors, such as type-level clocks and
deterministic scheduling, exist in the field of
synchronous programming, but are not implemented in
general-purpose languages like Haskell. Rhine is a
clock-safe library for synchronous and asynchronous
Functional Reactive Programming (FRP). It separates the
aspects of clocking, scheduling and resampling from
each other, and ensures clock-safety at the type level.
Concurrent communication is encapsulated safely.
Diverse reactive subsystems can be combined in a
coherent, declarative data-flow framework, while
correct interoperability of data at different rates is
guaranteed by type-level clocks. This provides a
general-purpose framework that simplifies multi-rate
FRP systems and can be used for game development, media
applications, GUIs and embedded systems, through a
flexible API with many reusable components.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Matsuda:2018:EIL,
author = "Kazutaka Matsuda and Meng Wang",
title = "Embedding invertible languages with binders: a case of
the {FliPpr} language",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "158--171",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242758",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes a new embedding technique of
invertible programming languages, through the case of
the FliPpr language. Embedded languages have the
advantage of inheriting host languages' features and
supports; and one of the influential methods of
embedding is the tagless-final style, which enables a
high level of programmability and extensibility.
However, it is not straightforward to apply the method
to the family of invertible/reversible/bidirectional
languages, due to the different ways functions in such
domains are represented. We consider FliPpr, an
invertible pretty-printing system, as a representative
of such languages, and show that Atkey et al.'s
unembedding technique can be used to address the
problem. Together with a reformulation of FliPpr, our
embedding achieves a high level of interoperability
with the host language Haskell, which is not found in
any other invertible languages. We implement the idea
and demonstrate the benefits of the approach with
examples.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Han:2018:HPM,
author = "Dong Han and Tao He",
title = "A high-performance multicore {IO} manager based on
{\tt libuv} (experience report)",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "172--178",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242759",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a high performance multicore I/O manager
based on libuv for Glasgow Haskell Compiler (GHC). The
new I/O manager is packaged as an ordinary Haskell
package rather than baked into GHC's runtime system(GHC
RTS), yet takes advantage of GHC RTS's comprehensive
concurrent support, such as lightweight threads and
safe/unsafe FFI options. The new I/O manager's
performance is comparable with existing implementation,
with greater stability under high load. It also can be
easily extended to support all of libuv's
callback-based APIs, allowing us to write a complete
high performance I/O toolkit without spending time on
dealing with OS differences or low-level I/O system
calls.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Gissurarson:2018:SVH,
author = "Matth{\'\i}as P{\'a}ll Gissurarson",
title = "Suggesting valid hole fits for typed-holes (experience
report)",
journal = j-SIGPLAN,
volume = "53",
number = "7",
pages = "179--185",
month = jul,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3299711.3242760",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Type systems allow programmers to communicate a
partial specification of their program to the compiler
using types, which can then be used to check that the
implementation matches the specification. But can the
types be used to aid programmers during development? In
this experience report I describe the design and
implementation of my lightweight and practical
extension to the typed-holes of GHC that improves user
experience by adding a list of valid hole fits and
refinement hole fits to the error message of
typed-holes. By leveraging the type checker, these fits
are selected from identifiers in scope such that if the
hole is substituted with a valid hole fit, the
resulting expression is guaranteed to type check.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
remark = "Haskell '18 proceedings.",
}
@Article{Wilson:2018:BGT,
author = "Preston Tunnell Wilson and Ben Greenman and Justin
Pombrio and Shriram Krishnamurthi",
title = "The behavior of gradual types: a user study",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "1--12",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276947",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276947",
abstract = "There are several different gradual typing semantics,
reflecting different trade-offs between performance and
type soundness guarantees. Notably absent, however, are
any data on which of these semantics developers
actually prefer. We begin to rectify \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Bodin:2018:TMF,
author = "Martin Bodin and Tom{\'a}s Diaz and {\'E}ric Tanter",
title = "A trustworthy mechanized formalization of {R}",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "13--24",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276946",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276946",
abstract = "The R programming language is very popular for
developing statistical software and data analysis,
thanks to rich libraries, concise and expressive
syntax, and support for interactive programming. Yet,
the semantics of R is fairly complex, contains many
subtle corner cases, and is not formally specified.
This makes it difficult to reason about R programs. In
this work, we develop a big-step operational semantics
for R in the form of an interpreter written in the Coq
proof assistant. We ensure the trustworthiness of the
formalization by introducing a monadic encoding that
allows the Coq interpreter, CoqR, to be in direct
visual correspondence with the reference R interpreter,
GNU R. Additionally, we provide a testing framework
that supports systematic comparison of CoqR and GNU R.
In its current state, CoqR covers the nucleus of the R
language as well as numerous additional features,
making it pass a significant number of realistic test
cases from the GNU R and FastR projects. To exercise
the formal specification, we prove in Coq the
preservation of memory invariants in selected parts of
the interpreter. This work is an important first step
towards a robust environment for formal verification of
R programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Marron:2018:LLC,
author = "Mark Marron",
title = "Log++ logging for a cloud-native world",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "25--36",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276952",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276952",
abstract = "Logging is a fundamental part of the software
development and deployment lifecycle but logging
support is often provided as an afterthought via
limited library APIs or third-party modules. Given the
critical nature of logging in modern cloud, mobile,
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chen:2018:HBA,
author = "Hanfeng Chen and Joseph Vinish D'Silva and Hongji Chen
and Bettina Kemme and Laurie Hendren",
title = "{HorseIR}: bringing array programming languages
together with database query processing",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "37--49",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276951",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276951",
abstract = "Relational database management systems (RDBMS) are
operationally similar to a dynamic language processor.
They take SQL queries as input, dynamically generate an
optimized execution plan, and then execute it. In
recent decades, the emergence of in-. \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Serrano:2018:JAC,
author = "Manuel Serrano",
title = "{JavaScript AOT} compilation",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "50--63",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276950",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276950",
abstract = "Static compilation, a.k.a., ahead-of-time (AOT)
compilation, is an alternative approach to JIT
compilation that can combine good speed and lightweight
memory footprint, and that can accommodate read-only
memory constraints that are imposed by some devices and
some operating systems. Unfortunately the highly
dynamic nature of JavaScript makes it hard to compile
statically and all existing AOT compilers have either
gave up on good performance or full language support.
We have designed and implemented an AOT compiler that
aims at satisfying both. It supports full unrestricted
ECMAScript 5.1 plus many ECMAScript 2017 features and
the majority of benchmarks are within 50\% of the
performance of one of the fastest JIT compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Seginer:2018:QBO,
author = "Yoav Seginer and Theo Vosse and Gil Harari and Uri
Kolodny",
title = "Query-based object-oriented programming: a declarative
web of objects",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "64--75",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276949",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276949",
abstract = "We present a declarative, object-oriented language in
which queries play a central role. Queries are used not
only to access data, but also to refer to the
application's object members and as a means of program
control. The language is fully declarative,. \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Chari:2018:SCD,
author = "Guido Chari and Javier Pim{\'a}s and Jan Vitek and
Olivier Fl{\"u}ckiger",
title = "Self-contained development environments",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "76--87",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276948",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276948",
abstract = "Operating systems are traditionally implemented in
low- level, performance-oriented programming languages.
These languages typically rely on minimal runtime
support and provide unfettered access to the underlying
hardware. Tradition has benefits: \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Herrera:2018:NCW,
author = "David Herrera and Hanfeng Chen and Erick Lavoie and
Laurie Hendren",
title = "Numerical computing on the web: benchmarking for the
future",
journal = j-SIGPLAN,
volume = "53",
number = "8",
pages = "88--100",
month = oct,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393673.3276968",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393673.3276968",
abstract = "Recent advances in execution environments for
JavaScript and WebAssembly that run on a broad range of
devices, from workstations and mobile phones to IoT
devices, provide new opportunities for portable and
web-based numerical computing. Indeed, numerous
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Smeltzer:2018:DSL,
author = "Karl Smeltzer and Martin Erwig",
title = "A domain-specific language for exploratory data
visualization",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "1--13",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278138",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278138",
abstract = "With an ever-growing amount of collected data, the
importance of visualization as an analysis component is
growing in concert. The creation of good visualizations
often doesn't happen in one step but is rather an
iterative and exploratory process. \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Stucki:2018:PUM,
author = "Nicolas Stucki and Aggelos Biboudis and Martin
Odersky",
title = "A practical unification of multi-stage programming and
macros",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "14--27",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278139",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278139",
abstract = "Program generation is indispensable. We propose a
novel unification of two existing metaprogramming
techniques: multi-stage programming and hygienic
generative macros. The former supports runtime code
generation and execution in a type-safe manner while
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Hatch:2018:RRI,
author = "William Gallard Hatch and Matthew Flatt",
title = "{Rash}: from reckless interactions to reliable
programs",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "28--39",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278129",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278129",
abstract = "Command languages like the Bourne Shell provide a
terse syntax for exploratory programming and system
interaction. Shell users can begin to write programs
that automate their tasks by simply copying their
interactions verbatim into a script file. \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Soares:2018:EFI,
author = "Larissa Rocha Soares and Jens Meinicke and Sarah Nadi
and Christian K{\"a}stner and Eduardo Santana de
Almeida",
title = "Exploring feature interactions without specifications:
a controlled experiment",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "40--52",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278127",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278127",
abstract = "In highly configurable systems, features may interact
unexpectedly and produce faulty behavior. Those faults
are not easily identified from the analysis of each
feature separately, especially when feature
specifications are missing. We propose \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Khalaj:2018:IOD,
author = "Ebrahim Khalaj and Marwan Abi-Antoun",
title = "Inferring ownership domains from refinements",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "53--65",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278128",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278128",
abstract = "Ownership type qualifiers clarify aliasing invariants
that cannot be directly expressed in mainstream
programming languages. Adding qualifiers to code,
however, often involves significant overhead and
difficult interaction. We propose an analysis to
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Volanschi:2018:ISC,
author = "Nic Volanschi and Bernard Serpette and Charles
Consel",
title = "Implementing a semi-causal domain-specific language
for context detection over binary sensors",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "66--78",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278134",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278134",
abstract = "In spite of the fact that many sensors in use today
are binary (i.e. produce only values of 0 and 1), and
that useful context-aware applications are built
exclusively on top of them, there is currently no
development approach specifically targeted to
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Susungi:2018:MPC,
author = "Adilla Susungi and Norman A. Rink and Albert Cohen and
Jeronimo Castrillon and Claude Tadonki",
title = "Meta-programming for cross-domain tensor
optimizations",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "79--92",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278131",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278131",
abstract = "Many modern application domains crucially rely on
tensor operations. The optimization of programs that
operate on tensors poses difficulties that are not
adequately addressed by existing languages and tools.
Frameworks such as TensorFlow offer good \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Peldszus:2018:MBS,
author = "Sven Peldszus and Daniel Str{\"u}ber and Jan
J{\"u}rjens",
title = "Model-based security analysis of feature-oriented
software product lines",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "93--106",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278126",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278126",
abstract = "Today's software systems are too complex to ensure
security after the fact --- security has to be built
into systems by design. To this end, model-based
techniques such as UMLsec support the design-time
specification and analysis of security requirements
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Christophe:2018:ODA,
author = "Laurent Christophe and Coen {De Roover} and Elisa
Gonzalez Boix and Wolfgang {De Meuter}",
title = "Orchestrating dynamic analyses of distributed
processes for full-stack {JavaScript} programs",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "107--118",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278135",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278135",
abstract = "Dynamic analyses are commonly implemented by
instrumenting the program under analysis. Examples of
such analyses for JavaScript range from checkers of
user- defined invariants to concolic testers. For a
full-stack JavaScript program, these analyses
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Ruland:2018:MES,
author = "Sebastian Ruland and Lars Luthmann and Johannes
B{\"u}rdek and Sascha Lity and Thomas Th{\"u}m and
Malte Lochau and M{\'a}rcio Ribeiro",
title = "Measuring effectiveness of sample-based product-line
testing",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "119--133",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278130",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278130",
abstract = "Recent research on quality assurance (QA) of
configurable software systems (e.g., software product
lines) proposes different analysis strategies to cope
with the inherent complexity caused by the well-known
combinatorial-explosion problem. Those \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Zhang:2018:PMO,
author = "Weixin Zhang and Bruno C. d. S. Oliveira",
title = "Pattern matching in an open world",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "134--146",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278124",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278124",
abstract = "Pattern matching is a pervasive and useful feature in
functional programming. There have been many attempts
to bring similar notions to Object-Oriented Programming
(OOP) in the past. However, a key challenge in OOP is
how pattern matching can coexist \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Al-Sibahi:2018:VHL,
author = "Ahmad Salim Al-Sibahi and Thomas P. Jensen and
Aleksandar S. Dimovski and Andrzej Wasowski",
title = "Verification of high-level transformations with
inductive refinement types",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "147--160",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278125",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278125",
abstract = "High-level transformation languages like Rascal
include expressive features for manipulating large
abstract syntax trees: first-class traversals,
expressive pattern matching, backtracking and
generalized iterators. We present the design and
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Cunha:2018:ESS,
author = "J{\'a}come Cunha and Mihai Dan and Martin Erwig and
Danila Fedorin and Alex Grejuc",
title = "Explaining spreadsheets with spreadsheets (short
paper)",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "161--167",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278136",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278136",
abstract = "Based on the concept of explanation sheets, we present
an approach to make spreadsheets easier to understand
and thus easier to use and maintain. We identify the
notion of explanation soundness and show that
explanation sheets which conform to simple \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{vanBinsbergen:2018:FHF,
author = "L. Thomas van Binsbergen",
title = "Funcons for {HGMP}: the fundamental constructs of
homogeneous generative meta-programming (short paper)",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "168--174",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278132",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278132",
abstract = "The PLanCompS project proposes a component-based
approach to programming-language development in which
fundamental constructs (funcons) are reused across
language definitions. Homogeneous Generative
Meta-Programming (HGMP) enables writing programs that
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Liu:2018:RTA,
author = "Yin Liu and Kijin An and Eli Tilevich",
title = "{RT-trust}: automated refactoring for trusted
execution under real-time constraints",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "175--187",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278137",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278137",
abstract = "Real-time systems must meet strict timeliness
requirements. These systems also often need to protect
their critical program information (CPI) from
adversarial interference and intellectual property
theft. Trusted execution environments (TEE) execute CPI
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Nieke:2018:AAF,
author = "Michael Nieke and Jacopo Mauro and Christoph Seidl and
Thomas Th{\"u}m and Ingrid Chieh Yu and Felix Franzke",
title = "Anomaly analyses for feature-model evolution",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "188--201",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278123",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278123",
abstract = "Software Product Lines (SPLs) are a common technique
to capture families of software products in terms of
commonalities and variabilities. On a conceptual level,
functionality of an SPL is modeled in terms of features
in Feature Models (FMs). As other \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}
@Article{Radanne:2018:RLG,
author = "Gabriel Radanne and Peter Thiemann",
title = "{Regenerate}: a language generator for extended
regular expressions",
journal = j-SIGPLAN,
volume = "53",
number = "9",
pages = "202--214",
month = nov,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3393934.3278133",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Apr 8 13:49:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
https://www.math.utah.edu/pub/tex/bib/string-matching.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3393934.3278133",
abstract = "Regular expressions are part of every programmer's
toolbox. They are used for a wide variety of
language-related tasks and there are many algorithms
for manipulating them. In particular, matching
algorithms that detect whether a word belongs to the
language described by a regular expression are well
explored, yet new algorithms appear frequently.
However, there is no satisfactory methodology for
testing such matchers. We propose a testing methodology
which is based on generating positive as well as
negative examples of words in the language. To this
end, we present a new algorithm to generate the
language described by a generalized regular expression
with intersection and complement operators. The
complement operator allows us to generate both positive
and negative example words from a given regular
expression. We implement our generator in Haskell and
OCaml and show that its performance is more than
adequate for testing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "https://dl.acm.org/loi/sigplan",
}