@Preamble{"\input bibnames.sty" #
"\hyphenation{
}" #
"\ifx \undefined \circled \def \circled #1{(#1)}\fi" #
"\ifx \undefined \reg \def \reg {\circled{R}}\fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-TOS = "ACM Transactions on Storage"}
@Article{Rajan:2005:E,
author = "Sreeranga P. Rajan",
title = "Editorial",
journal = j-TOS,
volume = "1",
number = "1",
pages = "1--2",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Apr 14 12:33:44 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yu:2005:CAR,
author = "Haifeng Yu and Amin Vahdat",
title = "Consistent and automatic replica regeneration",
journal = j-TOS,
volume = "1",
number = "1",
pages = "3--37",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Apr 14 12:33:44 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Huang:2005:CRK,
author = "Andrew C. Huang and Armando Fox",
title = "Cheap recovery: a key to self-managing state",
journal = j-TOS,
volume = "1",
number = "1",
pages = "38--70",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Apr 14 12:33:44 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Ellard:2005:DPE,
author = "Daniel Ellard and James Megquier",
title = "{DISP}: {Practical}, efficient, secure and
fault-tolerant distributed data storage",
journal = j-TOS,
volume = "1",
number = "1",
pages = "71--94",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Apr 14 12:33:44 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hughes:2005:RSR,
author = "Gordon F. Hughes and Joseph F. Murray",
title = "Reliability and security of {RAID} storage systems and
{D2D} archives using {SATA} disk drives",
journal = j-TOS,
volume = "1",
number = "1",
pages = "95--107",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Apr 14 12:33:44 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wu:2005:TRL,
author = "Changxun Wu and Randal Burns",
title = "Tunable randomization for load management in
shared-disk clusters",
journal = j-TOS,
volume = "1",
number = "1",
pages = "108--131",
month = feb,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Apr 14 12:33:44 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sivathanu:2005:ISS,
author = "Muthian Sivathanu and Vijayan Prabhakaran and Andrea
C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "Improving storage system availability with {D-GRAID}",
journal = j-TOS,
volume = "1",
number = "2",
pages = "133--170",
month = may,
year = "2005",
CODEN = "????",
DOI = "https://doi.org/10.1145/1063786.1063787",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Jul 7 13:56:40 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Jiang:2005:NFS,
author = "Anxiao (Andrew) Jiang and Jehoshua Bruck",
title = "Network file storage with graceful performance
degradation",
journal = j-TOS,
volume = "1",
number = "2",
pages = "171--189",
month = may,
year = "2005",
CODEN = "????",
DOI = "https://doi.org/10.1145/1063786.1063788",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Jul 7 13:56:40 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Peterson:2005:ETS,
author = "Zachary Peterson and Randal Burns",
title = "{Ext3cow}: a time-shifting file system for regulatory
compliance",
journal = j-TOS,
volume = "1",
number = "2",
pages = "190--212",
month = may,
year = "2005",
CODEN = "????",
DOI = "https://doi.org/10.1145/1063786.1063789",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Jul 7 13:56:40 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "http://hssl.cs.jhu.edu/~zachary/papers/peterson-tos05.pdf",
abstract = "The ext3cow file system, built on the popular ext3
file system, provides an open-source file versioning
and snapshot platform for compliance with the
versioning and auditability requirements of recent
electronic record retention legislation. Ext3cow
provides a time-shifting interface that permits a
real-time and continuous view of data in the past.
Time-shifting does not pollute the file system
namespace nor require snapshots to be mounted as a
separate file system. Further, ext3cow is implemented
entirely in the file system space and, therefore, does
not modify kernel interfaces or change the operation of
other file systems. Ext3cow takes advantage of the
fine-grained control of on-disk and in-memory data
available only to a file system, resulting in minimal
degradation of performance and functionality.
Experimental results confirm this hypothesis; ext3cow
performs comparably to ext3 on many benchmarks and on
trace-driven experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2005:MBC,
author = "Zhenmin Li and Zhifeng Chen and Yuanyuan Zhou",
title = "Mining block correlations to improve storage
performance",
journal = j-TOS,
volume = "1",
number = "2",
pages = "213--245",
month = may,
year = "2005",
CODEN = "????",
DOI = "https://doi.org/10.1145/1063786.1063790",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Jul 7 13:56:40 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Garg:2005:BDD,
author = "Nitin Garg and Sumeet Sobti and Junwen Lai and
Fengzhou Zheng and Kai Li and Randolph Y. Wang and
Arvind Krishnamurthy",
title = "Bridging the digital divide: storage media $+$ postal
network $=$ generic high-bandwidth communication",
journal = j-TOS,
volume = "1",
number = "2",
pages = "246--275",
month = may,
year = "2005",
CODEN = "????",
DOI = "https://doi.org/10.1145/1063786.1063791",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Jul 7 13:56:40 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Making high-bandwidth Internet access pervasively
available to a large worldwide audience is a difficult
challenge, especially in many developing regions. As we
wait for the uncertain takeoff of technologies that
promise to improve the situation, we propose to explore
an approach that is potentially more easily realizable:
the use of digital storage media transported by the
postal system as a general digital communication
mechanism. We shall call such a system a Postmanet.
Compared to more conventional wide-area connectivity
options, the Postmanet has several important
advantages, including wide global reach, great
bandwidth potential, low cost, and ease of incremental
adoption. While the idea of sending digital content via
the postal system is not a new one, none of the
existing attempts have turned the postal system into a
generic and transparent communication channel that not
only can cater to a wide array of applications, but
also effectively manage the many idiosyncrasies
associated with using the postal system. In the
proposed Postmanet, we see two recurring themes at many
different levels of the system. One is the simultaneous
exploitation of the Internet and the postal system so
we can combine their latency and bandwidth advantages.
The other is the exploitation of the abundant capacity
and bandwidth of the Postmanet to improve its latency,
cost, and reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Ganesan:2005:MSS,
author = "Deepak Ganesan and Ben Greenstein and Deborah Estrin
and John Heidemann and Ramesh Govindan",
title = "Multiresolution storage and search in sensor
networks",
journal = j-TOS,
volume = "1",
number = "3",
pages = "277--315",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 17 15:49:46 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Seo:2005:EDR,
author = "Beomjoo Seo and Roger Zimmermann",
title = "Efficient disk replacement and data migration
algorithms for large disk subsystems",
journal = j-TOS,
volume = "1",
number = "3",
pages = "316--345",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 17 15:49:46 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2005:PDE,
author = "Xiaodong Li and Zhenmin Li and Yuanyuan Zhou and
Sarita Adve",
title = "Performance directed energy management for main memory
and disks",
journal = j-TOS,
volume = "1",
number = "3",
pages = "346--380",
month = aug,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 17 15:49:46 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chang:2005:EML,
author = "Li-Pin Chang and Tei-Wei Kuo",
title = "Efficient management for large-scale flash-memory
storage systems with resource conservation",
journal = j-TOS,
volume = "1",
number = "4",
pages = "381--418",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri May 26 08:38:08 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Anastasiadis:2005:SFT,
author = "Stergios V. Anastasiadis and Kenneth C. Sevcik and
Michael Stumm",
title = "Scalable and fault-tolerant support for variable
bit-rate data in the {Exedra} streaming server",
journal = j-TOS,
volume = "1",
number = "4",
pages = "419--456",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri May 26 08:38:08 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Karlsson:2005:TPD,
author = "Magnus Karlsson and Christos Karamanolis and Xiaoyun
Zhu",
title = "{Triage}: Performance differentiation for storage
systems using adaptive control",
journal = j-TOS,
volume = "1",
number = "4",
pages = "457--480",
month = nov,
year = "2005",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri May 26 08:38:08 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hong:2006:UMBa,
author = "Bo Hong and Feng Wang and Scott A. Brandt and Darrell
D. E. Long and Thomas J. E. Schwarz and S. J.",
title = "Using {MEMS}-based storage in computer
systems---{MEMS} storage architectures",
journal = j-TOS,
volume = "2",
number = "1",
pages = "1--21",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hsieh:2006:EIH,
author = "Jen-Wei Hsieh and Tei-Wei Kuo and Li-Pin Chang",
title = "Efficient identification of hot data for flash memory
storage systems",
journal = j-TOS,
volume = "2",
number = "1",
pages = "22--40",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Gurumurthi:2006:TID,
author = "Sudhanva Gurumurthi and Anand Sivasubramaniam",
title = "Thermal issues in disk drive design: Challenges and
possible solutions",
journal = j-TOS,
volume = "2",
number = "1",
pages = "41--73",
month = feb,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wright:2006:VUS,
author = "Charles P. Wright and Jay Dave and Puja Gupta and
Harikesavan Krishnan and David P. Quigley and Erez
Zadok and Mohammad Nayyer Zubair",
title = "Versatility and {Unix} semantics in namespace
unification",
journal = j-TOS,
volume = "2",
number = "1",
pages = "74--105",
month = feb,
year = "2006",
CODEN = "????",
DOI = "https://doi.org/10.1145/1138041.1138045",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Administrators often prefer to keep related sets of
files in different locations or media, as it is easier
to maintain them separately. Users, however, prefer to
see all files in one location for convenience. One
solution that accommodates both needs is virtual
namespace unification---providing a merged view of
several directories without physically merging them.
For example, namespace unification can merge the
contents of several CD-ROM images without unpacking
them, merge binary directories from different packages,
merge views from several file servers, and more.
Namespace unification can also enable snapshotting by
marking some data sources read-only and then utilizing
copy-on-write for the read-only sources. For example,
an OS image may be contained on a read-only CD-ROM
image---and the user's configuration, data, and
programs could be stored in a separate read-write
directory. With copy-on-write unification, the user
need not be concerned about the two disparate file
systems. It is difficult to maintain Unix semantics
while offering a versatile namespace unification
system. Past efforts to provide such unification often
compromised on the set of features provided or Unix
compatibility---resulting in an incomplete solution
that users could not use. We designed and implemented a
versatile namespace unification system called Unionfs.
Unionfs maintains Unix semantics while offering
advanced namespace unification features: dynamic
insertion and removal of namespaces at any point in the
merged view, mixing read-only and read-write
components, efficient in-kernel duplicate elimination,
NFS interoperability, and more. Since releasing our
Linux implementation, it has been used by thousands of
users and over a dozen Linux distributions, which
helped us discover and solve many practical problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Mykletun:2006:AIO,
author = "Einar Mykletun and Maithili Narasimha and Gene
Tsudik",
title = "Authentication and integrity in outsourced databases",
journal = j-TOS,
volume = "2",
number = "2",
pages = "107--138",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hong:2006:UMBb,
author = "Bo Hong and Scott A. Brandt and Darrell D. E. Long and
Ethan L. Miller and Ying Lin",
title = "Using {MEMS}-based storage in computer
systems---device modeling and management",
journal = j-TOS,
volume = "2",
number = "2",
pages = "139--160",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zadok:2006:IFS,
author = "Erez Zadok and Rakesh Iyer and Nikolai Joukov and
Gopalan Sivathanu and Charles P. Wright",
title = "On incremental file system development",
journal = j-TOS,
volume = "2",
number = "2",
pages = "161--196",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sugahara:2006:SMB,
author = "Satoshi Sugahara and Masaaki Tanaka",
title = "Spin {MOSFETs} as a basis for spintronics",
journal = j-TOS,
volume = "2",
number = "2",
pages = "197--219",
month = may,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Aug 23 05:41:22 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Vazhkudai:2006:CCD,
author = "Sudharshan S. Vazhkudai and Xiaosong Ma and Vincent W.
Freeh and Jonathan W. Strickland and Nandan Tammineedi
and Tyler Simon and Stephen L. Scott",
title = "Constructing collaborative desktop storage caches for
large scientific datasets",
journal = j-TOS,
volume = "2",
number = "3",
pages = "221--254",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Won:2006:ISC,
author = "Youjip Won and Hyungkyu Chang and Jaemin Ryu and
Yongdai Kim and Junseok Shim",
title = "Intelligent storage: Cross-layer optimization for soft
real-time workload",
journal = j-TOS,
volume = "2",
number = "3",
pages = "255--282",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2006:SPV,
author = "Jianyong Zhang and Anand Sivasubramaniam and Qian Wang
and Alma Riska and Erik Riedel",
title = "Storage performance virtualization via throughput and
latency control",
journal = j-TOS,
volume = "2",
number = "3",
pages = "283--308",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wang:2006:CFS,
author = "An-I Andy Wang and Geoff Kuenning and Peter Reiher and
Gerald Popek",
title = "The {{\em Conquest\/}} file system: Better performance
through a disk\slash persistent-{RAM} hybrid design",
journal = j-TOS,
volume = "2",
number = "3",
pages = "309--348",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Memik:2006:MTE,
author = "Gokhan Memik and Mahmut T. Kandemir and Wei-Keng Liao
and Alok Choudhary",
title = "Multicollective {I/O}: a technique for exploiting
inter-file access patterns",
journal = j-TOS,
volume = "2",
number = "3",
pages = "349--369",
month = aug,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kang:2006:AVA,
author = "Sukwoo Kang and A. L. Narasimha Reddy",
title = "An approach to virtual allocation in storage systems",
journal = j-TOS,
volume = "2",
number = "4",
pages = "371--399",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Nijim:2006:MIS,
author = "Mais Nijim and Xiao Qin and Tao Xie",
title = "Modeling and improving security of a local disk system
for write-intensive workloads",
journal = j-TOS,
volume = "2",
number = "4",
pages = "400--423",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Bobbarjung:2006:IDE,
author = "Deepak R. Bobbarjung and Suresh Jagannathan and Cezary
Dubnicki",
title = "Improving duplicate elimination in storage systems",
journal = j-TOS,
volume = "2",
number = "4",
pages = "424--448",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wu:2006:DEI,
author = "Chin-Hsien Wu and Tei-Wei Kuo and Li-Pin Chang",
title = "The design of efficient initialization and crash
recovery for log-based file systems over flash memory",
journal = j-TOS,
volume = "2",
number = "4",
pages = "449--467",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lin:2006:EID,
author = "Song Lin and Demetrios Zeinalipour-Yazti and Vana
Kalogeraki and Dimitrios Gunopulos and Walid A.
Najjar",
title = "Efficient indexing data structures for flash-based
sensor devices",
journal = j-TOS,
volume = "2",
number = "4",
pages = "468--503",
month = nov,
year = "2006",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Arnan:2007:DDR,
author = "Ron Arnan and Eitan Bachmat and Tao Kai Lam and Ruben
Michel",
title = "Dynamic data reallocation in disk arrays",
journal = j-TOS,
volume = "3",
number = "1",
pages = "??--??",
month = mar,
year = "2007",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kim:2007:ZR,
author = "Seon Ho Kim and Hong Zhu and Roger Zimmermann",
title = "Zoned-{RAID}",
journal = j-TOS,
volume = "3",
number = "1",
pages = "??--??",
month = mar,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1227835.1227836",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The RAID (Redundant Array of Inexpensive Disks) system
has been widely used in practical storage applications
for better performance, cost effectiveness, and
reliability. This study proposes a novel variant of
RAID named Zoned-RAID (Z-RAID). Z-RAID improves the
performance of traditional RAID by utilizing the zoning
property of modern disks which provides multiple zones
with different data transfer rates within a disk.
Z-RAID levels 1, 5, and 6 are introduced to enhance the
effective data transfer rate of RAID levels 1, 5, and
6, respectively, by constraining the placement of data
blocks in multizone disks. We apply the Z-RAID to a
practical and popular application, streaming media
server, that requires a high-data transfer rate as well
as a high reliability. The analytical and experimental
results demonstrate the superiority of Z-RAID to
conventional RAID. Z-RAID provides a higher effective
data transfer rate in normal mode with no disadvantage.
In the presence of a disk failure, Z-RAID still
performs as well as RAID.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2007:SEA,
author = "Guangyan Zhang and Jiwu Shu and Wei Xue and Weimin
Zheng",
title = "{SLAS}: An efficient approach to scaling round-robin
striped volumes",
journal = j-TOS,
volume = "3",
number = "1",
pages = "??--??",
month = mar,
year = "2007",
CODEN = "????",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Apr 14 11:04:31 MDT 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wright:2007:EAS,
author = "Charles P. Wright and Richard Spillane and Gopalan
Sivathanu and Erez Zadok",
title = "Extending {ACID} semantics to the file system",
journal = j-TOS,
volume = "3",
number = "2",
pages = "4:1--4:??",
month = jun,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1242520.1242521",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:16 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "An organization's data is often its most valuable
asset, but today's file systems provide few facilities
to ensure its safety. Databases, on the other hand,
have long provided transactions. Transactions are
useful because they provide atomicity, consistency,
isolation, and durability (ACID). Many applications
could make use of these semantics, but databases have a
wide variety of nonstandard interfaces. For example,
applications like mail servers currently perform
elaborate error handling to ensure atomicity and
consistency, because it is easier than using a DBMS. A
transaction-oriented programming model eliminates
complex error-handling code because failed operations
can simply be aborted without side effects. We have
designed a file system that exports ACID transactions
to user-level applications, while preserving the
ubiquitous and convenient POSIX interface. In our
prototype ACID file system, called Amino, updated
applications can protect arbitrary sequences of system
calls within a transaction. Unmodified applications
operate without any changes, but each system call is
transaction protected. We also built a recoverable
memory library with support for nested transactions to
allow applications to keep their in-memory data
structures consistent with the file system. Our
performance evaluation shows that ACID semantics can be
added to applications with acceptable overheads. When
Amino adds atomicity, consistency, and isolation
functionality to an application, it performs close to
Ext3. Amino achieves durability up to 46\% faster than
Ext3, thanks to improved locality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "databases; file system transactions; file systems;
ptrace monitors; recoverable memory",
}
@Article{Ding:2007:BCM,
author = "Xiaoning Ding and Song Jiang and Feng Chen",
title = "A buffer cache management scheme exploiting both
temporal and spatial localities",
journal = j-TOS,
volume = "3",
number = "2",
pages = "5:1--5:??",
month = jun,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1242520.1242522",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:16 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "On-disk sequentiality of requested blocks, or their
spatial locality, is critical to real disk performance
where the throughput of access to sequentially-placed
disk blocks can be an order of magnitude higher than
that of access to randomly-placed blocks.
Unfortunately, spatial locality of cached blocks is
largely ignored, and only temporal locality is
considered in current system buffer cache managements.
Thus, disk performance for workloads without dominant
sequential accesses can be seriously degraded. To
address this problem, we propose a scheme called DULO
(DU al LO cality) which exploits both temporal and
spatial localities in the buffer cache management.
Leveraging the filtering effect of the buffer cache,
DULO can influence the I/O request stream by making the
requests passed to the disk more sequential, thus
significantly increasing the effectiveness of I/O
scheduling and prefetching for disk performance
improvements.\par
We have implemented a prototype of DULO in Linux
2.6.11. The implementation shows that DULO can
significantly increases disk I/O throughput for
real-world applications such as a Web server, TPC
benchmark, file system benchmark, and scientific
programs. It reduces their execution times by as much
as 53\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "caching; file systems; hard disk; spatial locality;
temporal locality",
}
@Article{Rangaswami:2007:BMB,
author = "Raju Rangaswami and Zoran Dimitrijevi{\'c} and Edward
Chang and Klaus Schauser",
title = "Building {MEMS}-based storage systems for streaming
media",
journal = j-TOS,
volume = "3",
number = "2",
pages = "6:1--6:??",
month = jun,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1242520.1242523",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:16 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The performance of streaming media servers has been
limited by the dual requirements of high disk
throughput (to service more clients simultaneously) and
low memory use (to decrease system cost). To achieve
high disk throughput, disk drives must be accessed with
large IOs to amortize disk access overhead. Large IOs
imply an increased requirement of expensive DRAM, and,
consequently, greater overall system cost. MEMS-based
storage, an emerging storage technology, is predicted
to offer a price-performance point between those of
DRAM and disk drives. In this study, we propose storage
architectures that use the relatively inexpensive
MEMS-based storage devices as an intermediate layer
(between DRAM and disk drives) for temporarily staging
large disk IOs at a significantly lower cost. We
present data layout mechanisms and synchronized IO
scheduling algorithms for the real-time storage and
retrieval of streaming data within such an augmented
storage system. Analytical evaluation suggests that
MEMS-augmented storage hierarchies can reduce the cost
and improve the throughput of streaming servers
significantly.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "I/O scheduling; MEMS-based storage; multidisk storage;
storage architecture; streaming media",
}
@Article{Arpaci-Dusseau:2007:ISI,
author = "Andrea Arpaci-Dusseau and Remzi Arpaci-Dusseau",
title = "Introduction to special issue {USENIX} {FAST} 2007",
journal = j-TOS,
volume = "3",
number = "3",
pages = "7:1--7:??",
month = oct,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1288783.1288784",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:25 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Schroeder:2007:UDF,
author = "Bianca Schroeder and Garth A. Gibson",
title = "Understanding disk failure rates: What does an {MTTF}
of 1,000,000 hours mean to you?",
journal = j-TOS,
volume = "3",
number = "3",
pages = "8:1--8:??",
month = oct,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1288783.1288785",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:25 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Component failure in large-scale IT installations is
becoming an ever-larger problem as the number of
components in a single cluster approaches a
million.\par
This article is an extension of our previous study on
disk failures [Schroeder and Gibson 2007] and presents
and analyzes field-gathered disk replacement data from
a number of large production systems, including
high-performance computing sites and internet services
sites. More than 110,000 disks are covered by this
data, some for an entire lifetime of five years. The
data includes drives with SCSI and FC, as well as SATA
interfaces. The mean time-to-failure (MTTF) of those
drives, as specified in their datasheets, ranges from
1,000,000 to 1,500,000 hours, suggesting a nominal
annual failure rate of at most 0.88\%.\par
We find that in the field, annual disk replacement
rates typically exceed 1\%, with 2--4\% common and up
to 13\% observed on some systems. This suggests that
field replacement is a fairly different process than
one might predict based on datasheet MTTF.\par
We also find evidence, based on records of disk
replacements in the field, that failure rate is not
constant with age, and that rather than a significant
infant mortality effect, we see a significant early
onset of wear-out degradation. In other words, the
replacement rates in our data grew constantly with age,
an effect often assumed not to set in until after a
nominal lifetime of 5 years.\par
Interestingly, we observe little difference in
replacement rates between SCSI, FC, and SATA drives,
potentially an indication that disk-independent factors
such as operating conditions affect replacement rates
more than component-specific ones. On the other hand,
we see only one instance of a customer rejecting an
entire population of disks as a bad batch, in this case
because of media error rates, and this instance
involved SATA disks.\par
Time between replacement, a proxy for time between
failure, is not well modeled by an exponential
distribution and exhibits significant levels of
correlation, including autocorrelation and long-range
dependence.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "annual failure rates; annual replacement rates;
datasheet MTTF; failure correlation; hard drive
failure; hard drive replacements; infant mortality;
MTTF; storage reliability; time between failure;
wear-out",
}
@Article{Agrawal:2007:FYS,
author = "Nitin Agrawal and William J. Bolosky and John R.
Douceur and Jacob R. Lorch",
title = "A five-year study of file-system metadata",
journal = j-TOS,
volume = "3",
number = "3",
pages = "9:1--9:??",
month = oct,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1288783.1288788",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:25 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "For five years, we collected annual snapshots of
file-system metadata from over 60,000 Windows PC file
systems in a large corporation. In this article, we use
these snapshots to study temporal changes in file size,
file age, file-type frequency, directory size,
namespace structure, file-system population, storage
capacity and consumption, and degree of file
modification. We present a generative model that
explains the namespace structure and the distribution
of directory sizes. We find significant temporal trends
relating to the popularity of certain file types, the
origin of file content, the way the namespace is used,
and the degree of variation among file systems, as well
as more pedestrian changes in size and capacities. We
give examples of consequent lessons for designers of
file systems and related software.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "file systems; generative model; longitudinal study",
}
@Article{Gill:2007:OMS,
author = "Binny S. Gill and Luis Angel D. Bathen",
title = "Optimal multistream sequential prefetching in a shared
cache",
journal = j-TOS,
volume = "3",
number = "3",
pages = "10:1--10:??",
month = oct,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1288783.1288789",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:25 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Prefetching is a widely used technique in modern data
storage systems. We study the most widely used class of
prefetching algorithms known as sequential prefetching.
There are two problems that plague the state-of-the-art
sequential prefetching algorithms: (i) cache pollution,
which occurs when prefetched data replaces more useful
prefetched or demand-paged data, and (ii) prefetch
wastage, which happens when prefetched data is evicted
from the cache before it can be used.\par
A sequential prefetching algorithm can have a fixed or
adaptive degree of prefetch and can be either
synchronous (when it can prefetch only on a miss) or
asynchronous (when it can also prefetch on a hit). To
capture these distinctions we define four classes of
prefetching algorithms: fixed synchronous (FS), fixed
asynchronous (FA), adaptive synchronous (AS), and
adaptive asynchronous (AsynchA). We find that the
relatively unexplored class of AsynchA algorithms is in
fact the most promising for sequential prefetching. We
provide a first formal analysis of the criteria
necessary for optimal throughput when using an AsynchA
algorithm in a cache shared by multiple steady
sequential streams. We then provide a simple
implementation called AMP (adaptive multistream
prefetching) which adapts accordingly, leading to
near-optimal performance for any kind of sequential
workload and cache size.\par
Our experimental setup consisted of an IBM xSeries 345
dual processor server running Linux using five SCSI
disks. We observe that AMP convincingly outperforms all
the contending members of the FA, FS, and AS classes
for any number of streams and over all cache sizes. As
anecdotal evidence, in an experiment with 100
concurrent sequential streams and varying cache sizes,
AMP surpasses the FA, FS, and AS algorithms by
29--172\%, 12--24\%, and 21--210\%, respectively, while
outperforming OBL by a factor of 8. Even for complex
workloads like SPC1-Read, AMP is consistently the
best-performing algorithm. For the SPC2 video-on-demand
workload, AMP can sustain at least 25\% more streams
than the next best algorithm. Furthermore, for a
workload consisting of short sequences, where
optimality is more elusive, AMP is able to outperform
all the other contenders in overall
performance.\par
Finally, we implemented AMP in the state-of-the-art
enterprise storage system, the IBM system storage
DS8000 series. We demonstrated that AMP dramatically
improves performance for common sequential and batch
processing workloads and delivers up to a twofold
increase in the sequential read capacity.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "adaptive prefetching; asynchronous prefetching; cache
pollution; degree of prefetch; fixed prefetching;
multistream read; optimal prefetching; prefetch
wastage; prestaging; sequential prefetching;
synchronous prefetching; trigger distance",
}
@Article{Yumerefendi:2007:SAN,
author = "Aydan R. Yumerefendi and Jeffrey S. Chase",
title = "Strong accountability for network storage",
journal = j-TOS,
volume = "3",
number = "3",
pages = "11:1--11:??",
month = oct,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1288783.1288786",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:25 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "This article presents the design, implementation, and
evaluation of CATS, a network storage service with
strong accountability properties. CATS offers a simple
web services interface that allows clients to read and
write opaque objects of variable size. This interface
is similar to the one offered by existing commercial
Internet storage services. CATS extends the
functionality of commercial Internet storage services
by offering support for strong accountability.\par
A CATS server annotates read and write responses with
evidence of correct execution, and offers audit and
challenge interfaces that enable clients to verify that
the server is faithful. A faulty server cannot conceal
its misbehavior, and evidence of misbehavior is
independently verifiable by any participant. CATS
clients are also accountable for their actions on the
service. A client cannot deny its actions, and the
server can prove the impact of those actions on the
state views it presented to other
clients.\par
Experiments with a CATS prototype evaluate the cost of
accountability under a range of conditions and expose
the primary factors influencing the level of assurance
and the performance of a strongly accountable storage
server. The results show that strong accountability is
practical for network storage systems in settings with
strong identity and modest degrees of write-sharing. We
discuss how the accountability concepts and techniques
used in CATS generalize to other classes of network
services.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "accountability; accountable services; accountable
storage",
}
@Article{Cipar:2007:CSU,
author = "James Cipar and Mark D. Corner and Emery D. Berger",
title = "Contributing storage using the transparent file
system",
journal = j-TOS,
volume = "3",
number = "3",
pages = "12:1--12:??",
month = oct,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1288783.1288787",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:25 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Contributory applications allow users to donate unused
resources on their personal computers to a shared pool.
Applications such as SETI@home, Folding@home, and
Freenet are now in wide use and provide a variety of
services, including data processing and content
distribution. However, while several research projects
have proposed contributory applications that support
peer-to-peer storage systems, their adoption has been
comparatively limited. We believe that a key barrier to
the adoption of contributory storage systems is that
contributing a large quantity of local storage
interferes with the principal user of the
machine.\par
To overcome this barrier, we introduce the Transparent
File System (TFS). TFS provides background tasks with
large amounts of unreliable storage --- all of the
currently available space --- without impacting the
performance of ordinary file access operations. We show
that TFS allows a peer-to-peer contributory storage
system to provide 40\% more storage at twice the
performance when compared to a user-space storage
mechanism. We analyze the impact of TFS on replication
in peer-to-peer storage systems and show that TFS does
not appreciably increase the resources needed for file
replication.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "aging; contributory systems; fragmentation;
peer-to-peer",
}
@Article{Weddle:2007:PGS,
author = "Charles Weddle and Mathew Oldham and Jin Qian and An-I
Andy Wang and Peter Reiher and Geoff Kuenning",
title = "{PARAID}: a gear-shifting power-aware {RAID}",
journal = j-TOS,
volume = "3",
number = "3",
pages = "13:1--13:??",
month = oct,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1288783.1288787",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:25 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Reducing power consumption for server-class computers
is important, since increased energy usage causes more
heat dissipation, greater cooling requirements, reduced
computational density, and higher operating costs. For
a typical data center, storage accounts for 27\% of
energy consumption. Conventional server-class RAIDs
cannot easily reduce power because loads are balanced
to use all disks, even for light loads.\par
We have built the power-aware RAID (PARAID), which
reduces energy use of commodity server-class disks
without specialized hardware. PARAID uses a skewed
striping pattern to adapt to the system load by varying
the number of powered disks. By spinning disks down
during light loads, PARAID can reduce power
consumption, while still meeting performance demands,
by matching the number of powered disks to the system
load. Reliability is achieved by limiting disk power
cycles and using different RAID encoding schemes. Based
on our five-disk prototype, PARAID uses up to 34\% less
power than conventional RAIDs while achieving similar
performance and reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "energy efficiency; power savings; RAID",
}
@Article{Maccormick:2008:NPR,
author = "John Maccormick and Chandramohan A. Thekkath and
Marcus Jager and Kristof Roomp and Lidong Zhou and Ryan
Peterson",
title = "Niobe: a practical replication protocol",
journal = j-TOS,
volume = "3",
number = "4",
pages = "1:1--1:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1326542.1326543",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The task of consistently and reliably replicating data
is fundamental in distributed systems, and numerous
existing protocols are able to achieve such replication
efficiently. When called on to build a large-scale
enterprise storage system with built-in replication, we
were therefore surprised to discover that no existing
protocols met our requirements. As a result, we
designed and deployed a new replication protocol called
Niobe. Niobe is in the primary-backup family of
protocols, and shares many similarities with other
protocols in this family. But we believe Niobe is
significantly more practical for large-scale enterprise
storage than previously published protocols. In
particular, Niobe is simple, flexible, has rigorously
proven yet simply stated consistency guarantees, and
exhibits excellent performance. Niobe has been deployed
as the backend for a commercial Internet service; its
consistency properties have been proved formally from
first principles, and further verified using the TLA +
specification language. We describe the protocol
itself, the system built to deploy it, and some of our
experiences in doing so.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "enterprise storage; replication",
}
@Article{Rodeh:2008:BTS,
author = "Ohad Rodeh",
title = "{B}-trees, shadowing, and clones",
journal = j-TOS,
volume = "3",
number = "4",
pages = "2:1--2:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1326542.1326544",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "B-trees are used by many file systems to represent
files and directories. They provide guaranteed
logarithmic time key-search, insert, and remove. File
systems like WAFL and ZFS use shadowing, or
copy-on-write, to implement snapshots, crash recovery,
write-batching, and RAID. Serious difficulties arise
when trying to use B-trees and shadowing in a single
system.\par
This article is about a set of B-tree algorithms that
respects shadowing, achieves good concurrency, and
implements cloning (writable snapshots). Our cloning
algorithm is efficient and allows the creation of a
large number of clones.\par
We believe that using our B-trees would allow shadowing
file systems to better scale their on-disk data
structures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "B-trees; concurrency; copy-on-write; shadowing;
snapshots",
}
@Article{Dutta:2008:WBG,
author = "Kaushik Dutta and Raju Rangaswami and Sajib Kundu",
title = "Workload-based generation of administrator hints for
optimizing database storage utilization",
journal = j-TOS,
volume = "3",
number = "4",
pages = "3:1--3:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1326542.1326545",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Database storage management at data centers is a
manual, time-consuming, and error-prone task. Such
management involves regular movement of database
objects across storage nodes in an attempt to balance
the I/O bandwidth utilization across disk drives.
Achieving such balance is critical for avoiding I/O
bottlenecks and thereby maximizing the utilization of
the storage system. However, manual management of the
aforesaid task, apart from increasing administrative
costs, encumbers the greater risks of untimely and
erroneous operations. We address the preceding concerns
with STORM, an automated approach that combines
low-overhead information gathering of database access
and storage usage patterns with efficient analysis to
generate accurate and timely hints for the
administrator regarding data movement operations.
STORM's primary objective is minimizing the volume of
data movement required (to minimize potential down-time
or reduction in performance) during the reconfiguration
operation, with the secondary constraints of space and
balanced I/O-bandwidth-utilization across the storage
devices. We analyze and evaluate STORM theoretically,
using a simulation framework, as well as
experimentally. We show that the dynamic data layout
reconfiguration problem is NP-hard and we present a
heuristic that provides an approximate solution in $
O(N \log (N / M) + (N / M)^2) $ time, where M is the
number of storage devices and $N$ is the total number
of database objects residing in the storage devices. A
simulation study shows that the heuristic converges to
an acceptable solution that is successful in balancing
storage utilization with an accuracy that lies within
7\% of the ideal solution. Finally, an experimental
study demonstrates that the STORM approach can improve
the overall performance of the TPC-C benchmark by as
much as 22\%, by reconfiguring an initial random, but
evenly distributed, placement of database objects.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Verma:2008:UBU,
author = "Akshat Verma and Rohit Jain and Sugata Ghosal",
title = "A utility-based unified disk scheduling framework for
shared mixed-media services",
journal = j-TOS,
volume = "3",
number = "4",
pages = "4:1--4:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1326542.1326546",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We present a new disk scheduling framework to address
the needs of a shared multimedia service that provides
differentiated multilevel quality-of-service for
mixed-media workloads. In such a shared service,
requests from different users have different associated
performance objectives and utilities, in accordance
with the negotiated service-level agreements (SLAs).
Service providers typically provision resources only
for average workload intensity, so it becomes important
to handle workload surges in a way that maximizes the
utility of the served requests.\par
We capture the performance objectives and utilities
associated with these multiclass diverse workloads in a
unified framework and formulate the disk scheduling
problem as a reward maximization problem. We map the
reward maximization problem to a minimization problem
on graphs and, by novel use of graph-theoretic
techniques, design a scheduling algorithm that is
computationally efficient and optimal in the class of
seek-optimizing algorithms. Comprehensive experimental
studies demonstrate that the proposed algorithm
outperforms other disk schedulers under all loads, with
the performance improvement approaching 100\% under
certain high load conditions. In contrast to existing
schedulers, the proposed scheduler is extensible to new
performance objectives (workload type) and utilities by
simply altering the reward functions associated with
the requests.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "disk scheduling; GSP; profit maximization; shortest
path",
}
@Article{Hildrum:2008:SOL,
author = "Kirsten Hildrum and Fred Douglis and Joel L. Wolf and
Philip S. Yu and Lisa Fleischer and Akshay Katta",
title = "Storage optimization for large-scale distributed
stream-processing systems",
journal = j-TOS,
volume = "3",
number = "4",
pages = "5:1--5:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1326542.1326547",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:37 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We consider storage in an extremely large-scale
distributed computer system designed for stream
processing applications. In such systems, both incoming
data and intermediate results may need to be stored to
enable analyses at unknown future times. The quantity
of data of potential use would dominate even the
largest storage system. Thus, a mechanism is needed to
keep the data most likely to be used. One recently
introduced approach is to employ retention value
functions, which effectively assign each data object a
value that changes over time in a prespecified way
[Douglis et al.2004]. Storage space for data entering
the system is reclaimed automatically by deleting data
of the lowest current value. In such large systems,
there will naturally be multiple file systems
available, each with different properties. Choosing the
right file system for a given incoming stream of data
presents a challenge. In this article we provide a
novel and effective scheme for optimizing the placement
of data within a distributed storage subsystem
employing retention value functions. The goal is to
keep the data of highest overall value, while
simultaneously balancing the read load to the file
system. The key aspects of such a scheme are quite
different from those that arise in traditional file
assignment problems. We further motivate this
optimization problem and describe a solution, comparing
its performance to other reasonable schemes via
simulation experiments.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "file assignment problem; load balancing; optimization;
storage management; streaming systems; theory",
}
@Article{Dholakia:2008:NID,
author = "Ajay Dholakia and Evangelos Eleftheriou and Xiao-Yu Hu
and Ilias Iliadis and Jai Menon and K. K. Rao",
title = "A new intra-disk redundancy scheme for
high-reliability {RAID} storage systems in the presence
of unrecoverable errors",
journal = j-TOS,
volume = "4",
number = "1",
pages = "1:1--1:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1353452.1353453",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:45 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Today's data storage systems are increasingly adopting
low-cost disk drives that have higher capacity but
lower reliability, leading to more frequent rebuilds
and to a higher risk of unrecoverable media errors. We
propose an efficient intradisk redundancy scheme to
enhance the reliability of RAID systems. This scheme
introduces an additional level of redundancy inside
each disk, on top of the RAID redundancy across
multiple disks. The RAID parity provides protection
against disk failures, whereas the proposed scheme aims
to protect against media-related unrecoverable errors.
In particular, we consider an intradisk redundancy
architecture that is based on an interleaved
parity-check coding scheme, which incurs only
negligible I/O performance degradation. A comparison
between this coding scheme and schemes based on
traditional Reed--Solomon codes and single-parity-check
codes is conducted by analytical means. A new model is
developed to capture the effect of correlated
unrecoverable sector errors. The probability of an
unrecoverable failure associated with these schemes is
derived for the new correlated model, as well as for
the simpler independent error model. We also derive
closed-form expressions for the mean time to data loss
of RAID-5 and RAID-6 systems in the presence of
unrecoverable errors and disk failures. We then combine
these results to characterize the reliability of RAID
systems that incorporate the intradisk redundancy
scheme. Our results show that in the practical case of
correlated errors, the interleaved parity-check scheme
provides the same reliability as the optimum, albeit
more complex, Reed--Solomon coding scheme. Finally, the
I/O and throughput performances are evaluated by means
of analysis and event-driven simulation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "file and I/O systems; RAID; reliability analysis;
stochastic modeling",
}
@Article{Essary:2008:PDG,
author = "David Essary and Ahmed Amer",
title = "Predictive data grouping: Defining the bounds of
energy and latency reduction through predictive data
grouping and replication",
journal = j-TOS,
volume = "4",
number = "1",
pages = "2:1--2:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1353452.1353454",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:45 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We demonstrate that predictive grouping is an
effective mechanism for reducing disk arm movement,
thereby simultaneously reducing energy consumption and
data access latency. We further demonstrate that
predictive grouping has untapped dramatic potential to
further improve access performance and limit energy
consumption. Data retrieval latencies are considered a
major bottleneck, and with growing volumes of data and
increased storage needs it is only growing in
significance. Data storage infrastructure is therefore
a growing consumer of energy at data-center scales,
while the individual disk is already a significant
concern for mobile computing (accounting for almost a
third of a mobile system's energy demands). While
improving responsiveness of storage subsystems and
hence reducing latencies in data retrieval is often
considered contradictory with efforts to reduce disk
energy consumption, we demonstrate that predictive data
grouping has the potential to simultaneously work
towards both these goals. Predictive data grouping has
advantages in its applicability compared to both prior
approaches to reducing latencies and to reducing energy
usage. For latencies, grouping can be performed
opportunistically, thereby avoiding the serious
performance penalties that can be incurred with prior
applications of access prediction (such as predictive
prefetching of data). For energy, we show how
predictive grouping can even save energy use for an
individual disk that is never idle.\par
Predictive data grouping with effective replication
results in a reduction of the overall mechanical
movement required to retrieve data. We have built upon
our detailed measurements of disk power consumption,
and have estimated both the energy expended by a hard
disk for its mechanical components, and that needed to
move the disk arm. We have further compared, via
simulation, three models of predictive grouping of
on-disk data, including an optimal arrangement of data
that is guaranteed to minimize disk arm movement. These
experiments have allowed us to measure the limits of
performance improvement achievable with optimal data
grouping and replication strategies on a single device,
and have further allowed us to demonstrate the
potential of such schemes to reduce energy consumption
of mechanical components by up to 70\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "data grouping; latency; layout optimization; power;
power management; replication",
}
@Article{Tran:2008:NAD,
author = "Dinh Nguyen Tran and Phung Chinh Huynh and Y. C. Tay
and Anthony K. H. Tung",
title = "A new approach to dynamic self-tuning of database
buffers",
journal = j-TOS,
volume = "4",
number = "1",
pages = "3:1--3:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1353452.1353455",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:45 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Current businesses rely heavily on efficient access to
their databases. Manual tuning of these database
systems by performance experts is increasingly
infeasible: For small companies, hiring an expert may
be too expensive; for large enterprises, even an expert
may not fully understand the interaction between a
large system and its multiple changing workloads. This
trend has led major vendors to offer tools that
automatically and dynamically tune a database
system.\par
Many database tuning knobs concern the buffer pool for
caching data and disk pages. Specifically, these knobs
control the buffer allocation and thus the cache miss
probability, which has direct impact on
performance.\par
Previous methods for automatic buffer tuning are based
on simulation, black-box control, gradient descent, and
empirical equations. This article presents a new
approach, using calculations with an
analytically-derived equation that relates miss
probability to buffer allocation; this equation fits
four buffer replacement policies, as well as twelve
datasets from mainframes running commercial databases
in large corporations.\par
The equation identifies a buffer-size limit that is
useful for buffer tuning and powering down idle
buffers. It can also replace simulation in predicting
I/O costs. Experiments with PostgreSQL illustrate how
the equation can help optimize online buffer
partitioning, ensure fairness in buffer reclamation,
and dynamically retune the allocation when workloads
change. It is also used, in conjunction with DB2's
interface for retrieving miss data, for tuning DB2
buffer allocation to achieve targets for differentiated
service.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "autonomic computing; buffer allocation; miss
probability",
}
@Article{Matthews:2008:ITM,
author = "Jeanna Matthews and Sanjeev Trika and Debra Hensgen
and Rick Coulson and Knut Grimsrud",
title = "Intel{\reg} Turbo Memory: Nonvolatile disk caches in
the storage hierarchy of mainstream computer systems",
journal = j-TOS,
volume = "4",
number = "2",
pages = "4:1--4:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1367829.1367830",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:51 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Hard-disk drives are a significant bottleneck to
system performance and are also responsible for a
significant fraction of total system power consumption.
Intel Turbo Memory addresses these problems by adding a
new layer to the storage hierarchy: a platform-based
and nonvolatile, disk cache. In this article, we
describe the hardware and software elements of the
Intel Turbo Memory architecture. We show how it
supports the new ReadyBoost and ReadyDrive features in
Microsoft Vista and describe its key caching
algorithms. We present performance, power savings, and
wear-leveling results achieved by Intel Turbo Memory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "disk cache; NAND; nonvolatile memory; solid-state
disk; write-back",
}
@Article{Traeger:2008:NYS,
author = "Avishay Traeger and Erez Zadok and Nikolai Joukov and
Charles P. Wright",
title = "A nine year study of file system and storage
benchmarking",
journal = j-TOS,
volume = "4",
number = "2",
pages = "5:1--5:??",
month = may,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1367829.1367831",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jun 16 17:36:51 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Benchmarking is critical when evaluating performance,
but is especially difficult for file and storage
systems. Complex interactions between I/O devices,
caches, kernel daemons, and other OS components result
in behavior that is rather difficult to analyze.
Moreover, systems have different features and
optimizations, so no single benchmark is always
suitable. The large variety of workloads that these
systems experience in the real world also adds to this
difficulty.\par
In this article we survey 415 file system and storage
benchmarks from 106 recent papers. We found that most
popular benchmarks are flawed and many research papers
do not provide a clear indication of true performance.
We provide guidelines that we hope will improve future
performance evaluations. To show how some widely used
benchmarks can conceal or overemphasize overheads, we
conducted a set of experiments. As a specific example,
slowing down read operations on ext2 by a factor of 32
resulted in only a 2--5\% wall-clock slowdown in a
popular compile benchmark. Finally, we discuss future
work to improve file system and storage benchmarking.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "benchmarks; file systems; storage systems",
}
@Article{Baker:2008:ISI,
author = "Mary Baker",
title = "Introduction to special issue of {USENIX FAST 2008}",
journal = j-TOS,
volume = "4",
number = "3",
pages = "6:1--6:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1416944.1416945",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:07 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Jiang:2008:DDC,
author = "Weihang Jiang and Chongfeng Hu and Yuanyuan Zhou and
Arkady Kanevsky",
title = "Are disks the dominant contributor for storage
failures?: a comprehensive study of storage subsystem
failure characteristics",
journal = j-TOS,
volume = "4",
number = "3",
pages = "7:1--7:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1416944.1416946",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:07 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Building reliable storage systems becomes increasingly
challenging as the complexity of modern storage systems
continues to grow. Understanding storage failure
characteristics is crucially important for designing
and building a reliable storage system. While several
recent studies have been conducted on understanding
storage failures, almost all of them focus on the
failure characteristics of one component --- disks ---
and do not study other storage component
failures.\par
This article analyzes the failure characteristics of
storage subsystems. More specifically, we analyzed the
storage logs collected from about 39,000 storage
systems commercially deployed at various customer
sites. The dataset covers a period of 44 months and
includes about 1,800,000 disks hosted in about 155,000
storage-shelf enclosures. Our study reveals many
interesting findings, providing useful guidelines for
designing reliable storage systems. Some of our major
findings include: (1) In addition to disk failures that
contribute to 20--55\% of storage subsystem failures,
other components such as physical interconnects and
protocol stacks also account for a significant
percentage of storage subsystem failures. (2) Each
individual storage subsystem failure type, and storage
subsystem failure as a whole, exhibits strong
self-correlations. In addition, these failures exhibit
``bursty'' patterns. (3) Storage subsystems configured
with redundant interconnects experience 30--40\% lower
failure rates than those with a single interconnect.
(4) Spanning disks of a RAID group across multiple
shelves provides a more resilient solution for storage
subsystems than within a single shelf.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "disk failures; failure characteristics; storage
subsystem; Storage system",
}
@Article{Bairavasundaram:2008:ADC,
author = "Lakshmi N. Bairavasundaram and Andrea C.
Arpaci-Dusseau and Remzi H. Arpaci-Dusseau and Garth R.
Goodson and Bianca Schroeder",
title = "An analysis of data corruption in the storage stack",
journal = j-TOS,
volume = "4",
number = "3",
pages = "8:1--8:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1416944.1416947",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:07 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "An important threat to reliable storage of data is
silent data corruption. In order to develop suitable
protection mechanisms against data corruption, it is
essential to understand its characteristics. In this
article, we present the first large-scale study of data
corruption. We analyze corruption instances recorded in
production storage systems containing a total of 1.53
million disk drives, over a period of 41 months. We
study three classes of corruption: checksum mismatches,
identity discrepancies, and parity inconsistencies. We
focus on checksum mismatches since they occur the
most.\par
We find more than 400,000 instances of checksum
mismatches over the 41-month period. We find many
interesting trends among these instances, including:
(i) nearline disks (and their adapters) develop
checksum mismatches an order of magnitude more often
than enterprise-class disk drives, (ii) checksum
mismatches within the same disk are not independent
events and they show high spatial and temporal
locality, and (iii) checksum mismatches across
different disks in the same storage system are not
independent. We use our observations to derive lessons
for corruption-proof system design.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Data corruption; disk drive reliability",
}
@Article{Tsafrir:2008:PSF,
author = "Dan Tsafrir and Tomer Hertz and David Wagner and Dilma
{Da Silva}",
title = "Portably solving file races with hardness
amplification",
journal = j-TOS,
volume = "4",
number = "3",
pages = "9:1--9:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1416944.1416948",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:07 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The file-system API of contemporary systems makes
programs vulnerable to TOCTTOU
(time-of-check-to-time-of-use) race conditions.
Existing solutions either help users to detect these
problems (by pinpointing their locations in the code),
or prevent the problem altogether (by modifying the
kernel or its API). But the latter alternative is not
prevalent, and the former is just the first step:
Programmers must still address TOCTTOU flaws within the
limits of the existing API with which several important
tasks cannot be accomplished in a portable
straightforward manner. Recently, Dean and Hu [2004]
addressed this problem and suggested a probabilistic
hardness amplification approach that alleviated the
matter. Alas, shortly after, Borisov et al. [2005]
responded with an attack termed ``filesystem maze''
that defeated the new approach.\par
We begin by noting that mazes constitute a generic way
to deterministically win many TOCTTOU races (gone are
the days when the probability was small). In the face
of this threat, we: (1) develop a new user-level
defense that can withstand mazes; and (2) show that our
method is undefeated even by much stronger hypothetical
attacks that provide the adversary program with ideal
conditions to win the race (enjoying complete and
instantaneous knowledge about the defending program's
actions and being able to perfectly synchronize
accordingly). The fact that our approach is immune to
these unrealistic attacks suggests it can be used as a
simple and portable solution to a large class of
TOCTTOU vulnerabilities, without requiring
modifications to the underlying operating system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Race conditions; time-of-check-to-time-of-use;
TOCTTOU",
}
@Article{Narayanan:2008:WLP,
author = "Dushyanth Narayanan and Austin Donnelly and Antony
Rowstron",
title = "Write off-loading: Practical power management for
enterprise storage",
journal = j-TOS,
volume = "4",
number = "3",
pages = "10:1--10:??",
month = nov,
year = "2008",
CODEN = "????",
DOI = "https://doi.org/10.1145/1416944.1416949",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:07 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In enterprise data centers power usage is a problem
impacting server density and the total cost of
ownership. Storage uses a significant fraction of the
power budget and there are no widely deployed
power-saving solutions for enterprise storage systems.
The traditional view is that enterprise workloads make
spinning disks down ineffective because idle periods
are too short. We analyzed block-level traces from 36
volumes in an enterprise data center for one week and
concluded that significant idle periods exist, and that
they can be further increased by modifying the
read/write patterns using {\em write off-loading}.
Write off-loading allows write requests on spun-down
disks to be temporarily redirected to persistent
storage elsewhere in the data center.\par
The key challenge is doing this transparently and
efficiently at the block level, without sacrificing
consistency or failure resilience. We describe our
write off-loading design and implementation that
achieves these goals. We evaluate it by replaying
portions of our traces on a rack-based testbed. Results
show that just spinning disks down when idle saves
28--36\% of energy, and write off-loading further
increases the savings to 45--60\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "disk spin-down; DiskEnergy; energy; enterprise
storage; power; write off-loading",
}
@Article{MacCormick:2009:KNA,
author = "John MacCormick and Nicholas Murphy and Venugopalan
Ramasubramanian and Udi Wieder and Junfeng Yang and
Lidong Zhou",
title = "Kinesis: a new approach to replica placement in
distributed storage systems",
journal = j-TOS,
volume = "4",
number = "4",
pages = "11:1--11:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1480439.1480440",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:20 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Kinesis is a novel data placement model for
distributed storage systems. It exemplifies three
design principles: {\em structure\/} (division of
servers into a few failure-isolated segments), {\em
freedom of choice\/} (freedom to allocate the best
servers to store and retrieve data based on current
resource availability), and {\em scattered
distribution\/} (independent, pseudo-random spread of
replicas in the system). These design principles enable
storage systems to achieve balanced utilization of
storage and network resources in the presence of
incremental system expansions, failures of single and
shared components, and skewed distributions of data
size and popularity. In turn, this ability leads to
significantly reduced resource provisioning costs, good
user-perceived response times, and fast, parallelized
recovery from independent and correlated
failures.\par
This article validates Kinesis through theoretical
analysis, simulations, and experiments on a prototype
implementation. Evaluations driven by real-world traces
show that Kinesis can significantly outperform the
widely used Chain replica-placement strategy in terms
of resource requirements, end-to-end delay, and failure
recovery.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "load balancing; multiple-choice paradigm; Storage
system",
}
@Article{Huang:2009:QSS,
author = "Chih-Yuan Huang and Tei-Wei Kuo and Ai-Chun Pang",
title = "{QoS} for storage subsystems using {IEEE-1394}",
journal = j-TOS,
volume = "4",
number = "4",
pages = "12:1--12:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1480439.1480441",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:20 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "IEEE-1394 is widely adopted in various commercial
products for computing, communication, and
entertainment. Although many services with
Quality-of-Service (QoS) supports are now available in
systems over IEEE-1394, little work is done for
QoS-based resource allocation. In this article, we aim
at the design of a bandwidth reservation mechanism and
its policy for isochronous requests, such as those from
cameras. We then address the QoS support issue for
asynchronous requests, such as those from disks, and an
analytic framework for probability-based QoS
guarantees. This work is concluded by the proposing of
a topology configuration algorithm for IEEE-1394
devices. The capability of the proposed methodology and
the analytic framework are evaluated by a series of
experiments over a Linux-based system prototype.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "I/O subsystem; IEEE-1394; quality-of-service; real
time",
}
@Article{Anastasiadis:2009:RFA,
author = "Stergios V. Anastasiadis and Rajiv G. Wickremesinghe
and Jeffrey S. Chase",
title = "Rethinking {FTP}: Aggressive block reordering for
large file transfers",
journal = j-TOS,
volume = "4",
number = "4",
pages = "13:1--13:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1480439.1480442",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:20 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Whole-file transfer is a basic primitive for Internet
content dissemination. Content servers are increasingly
limited by disk arm movement, given the rapid growth in
disk density, disk transfer rates, server network
bandwidth, and content size. Individual file transfers
are sequential, but the block access sequence on a
content server is effectively random when many slow
clients access large files concurrently. Although
larger blocks can help improve disk throughput,
buffering requirements increase linearly with block
size.\par
This article explores a novel block reordering
technique that can reduce server disk traffic
significantly when large content files are shared. The
idea is to transfer blocks to each client in any order
that is convenient for the server. The server sends
blocks to each client opportunistically in order to
maximize the advantage from the disk reads it issues to
serve other clients accessing the same file. We first
illustrate the motivation and potential impact of
aggressive block reordering using simple analytical
models. Then we describe a file transfer system using a
simple block reordering algorithm, called Circus.
Experimental results with the Circus prototype show
that it can improve server throughput by a factor of
two or more in workloads with strong file access
locality.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Disk access; file transfer protocols; scheduling",
}
@Article{Choi:2009:JFT,
author = "Hyun Jin Choi and Seung-Ho Lim and Kyu Ho Park",
title = "{JFTL}: a flash translation layer based on a journal
remapping for flash memory",
journal = j-TOS,
volume = "4",
number = "4",
pages = "14:1--14:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1480439.1480443",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:20 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In flash memory-based storage, a Flash Translation
Layer (FTL) manages the mapping between the logical
addresses of a file system and the physical addresses
of the flash memory. When a journaling file system is
set up on the FTL, the consistency of the file system
is guaranteed by duplications of the same file system
changes in both the journal region of the file system
and the home locations of the changes. However, these
duplications inevitably degrade the performance of the
file system. In this article we present an efficient
FTL, called {\em JFTL}, based on a journal remapping
technique. The FTL uses an address mapping method to
write all the data to a new region in a process known
as an out-of-place update. Because of this process, the
existing data in flash memory is not overwritten by
such an update. By using this characteristic of the
FTL, the JFTL remaps addresses of the logged file
system changes to addresses of the home locations of
the changes, instead of writing the changes once more
to flash memory. Thus, the JFTL efficiently eliminates
redundant data in the flash memory as well as
preserving the consistency of the journaling file
system. Our experiments confirm that, when associated
with a writeback or ordered mode of a conventional EXT3
file system, the JFTL enhances the performance of EXT3
by up to 20\%. Furthermore, when the JFTL operates with
a journaled mode of EXT3, there is almost a twofold
performance gain in many cases. Moreover, the recovery
performance of the JFTL is much better than that of the
FTL.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Flash memory; flash translation layer; garbage
detection; journal remapping; journaling file system",
}
@Article{Li:2009:GCS,
author = "Mingqiang Li and Jiwu Shu and Weimin Zheng",
title = "{GRID} codes: Strip-based erasure codes with high
fault tolerance for storage systems",
journal = j-TOS,
volume = "4",
number = "4",
pages = "15:1--15:??",
month = jan,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1480439.1480444",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:20 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "As storage systems grow in size and complexity, they
are increasingly confronted with concurrent disk
failures together with multiple unrecoverable sector
errors. To ensure high data reliability and
availability, erasure codes with high fault tolerance
are required. In this article, we present a new family
of erasure codes with high fault tolerance, named GRID
codes. They are called such because they are a family
of {\em strip-based codes\/} whose strips are arranged
into multi-dimensional grids. In the construction of
GRID codes, we first introduce a concept of {\em
matched codes\/} and then discuss how to use matched
codes to construct GRID codes. In addition, we propose
an iterative reconstruction algorithm for GRID codes.
We also discuss some important features of GRID codes.
Finally, we compare GRID codes with several categories
of existing codes. Our comparisons show that for
large-scale storage systems, our GRID codes have
attractive advantages over many existing erasure codes:
(a) They are completely XOR-based and have very regular
structures, ensuring easy implementation; (b) they can
provide up to 15 and even higher fault tolerance; and
(c) their storage efficiency can reach up to 80\% and
even higher. All the advantages make GRID codes more
suitable for large-scale storage systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Disk failure; erasure code; fault tolerance; storage
system; unrecoverable sector error",
}
@Article{Bahn:2009:PPS,
author = "Hyokyung Bahn and Soyoon Lee and Sam H. Noh",
title = "{P\slash PA-SPTF}: Parallelism-aware request
scheduling algorithms for {MEMS}-based storage
devices",
journal = j-TOS,
volume = "5",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1502777.1502778",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:38 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "MEMS-based storage is foreseen as a promising storage
media that provides high-bandwidth, low-power
consumption, high-density, and low cost. Due to these
versatile features, MEMS storage is anticipated to be
used for a wide range of applications from storage for
small handheld devices to high capacity mass storage
servers. However, MEMS storage has vastly different
physical characteristics compared to a traditional
disk. First, MEMS storage has thousands of heads that
can be activated simultaneously. Second, the media of
MEMS storage is a square structure which is different
from the platter structure of disks. This article
presents a new request scheduling algorithm for MEMS
storage called P-SPTF that makes use of the
aforementioned characteristics. P-SPTF considers the
parallelism of MEMS storage as well as the seek time of
requests on the two dimensional square structure. We
then present another algorithm called PA-SPTF that
considers the aging factor so that starvation
resistance is improved. Simulation studies show that
PA-SPTF improves the performance of MEMS storage by up
to 39.2\% in terms of the average response time and
62.4\% in terms of starvation resistance compared to
the widely acknowledged SPTF algorithm. We also show
that there exists a spectrum of scheduling algorithms
that subsumes both the P-SPTF and PA-SPTF algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "MEMS-based storage; parallelism; scheduling; seek
time; starvation",
}
@Article{Ma:2009:NAS,
author = "Di Ma and Gene Tsudik",
title = "A new approach to secure logging",
journal = j-TOS,
volume = "5",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1502777.1502779",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:38 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The need for secure logging is well-understood by the
security professionals, including both researchers and
practitioners. The ability to efficiently verify all
(or some) log entries is important to any application
employing secure logging techniques. In this article,
we begin by examining the state of the art in secure
logging and identify some problems inherent to systems
based on trusted third-party servers. We then propose a
different approach to secure logging based upon
recently developed Forward-Secure Sequential Aggregate
(FssAgg) authentication techniques. Our approach offers
both space-efficiency and provable security. We
illustrate two concrete schemes --- one
private-verifiable and one public-verifiable --- that
offer practical secure logging without any reliance on
online trusted third parties or secure hardware. We
also investigate the concept of immutability in the
context of forward-secure sequential aggregate
authentication to provide finer grained verification.
Finally we evaluate proposed schemes and report on our
experience with implementing them within a secure
logging system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "forward secure sequential aggregate (FssAgg)
authentication; forward-secure stream integrity; MACs;
Secure logging; signatures; truncation attack",
}
@Article{Garrison:2009:UFS,
author = "John A. Garrison and A. L. Narasimha Reddy",
title = "{Umbrella File System}: Storage management across
heterogeneous devices",
journal = j-TOS,
volume = "5",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1502777.1502780",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:38 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "With the advent of and recent developments in Flash
storage, device characteristic diversity is becoming
both more prevalent and more distinct. In this article,
we describe the Umbrella File System (UmbrellaFS), a
stackable file system designed to provide flexibility
in matching diversity of file access characteristics to
diversity of device characteristics through a user or
system administrator specified policy. We present the
design and results from a prototype implementation of
UmbrellaFS on both Linux 2.4 and 2.6. The results show
that UmbrellaFS has little overhead for most file
system operations while providing an ability better to
utilize the differences in Flash and traditional hard
drives. With appropriate use of rules, we have shown
improvements of up to 44\% in certain situations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Device characteristics; flash drives; namespaces;
policy-driven storage",
}
@Article{Mi:2009:EMI,
author = "Ningfang Mi and Alma Riska and Qi Zhang and Evgenia
Smirni and Erik Riedel",
title = "Efficient management of idleness in storage systems",
journal = j-TOS,
volume = "5",
number = "2",
pages = "4:1--4:??",
month = jun,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1534912.1534913",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:46 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Various activities that intend to enhance performance,
reliability, and availability of storage systems are
scheduled with low priority and served during idle
times. Under such conditions, idleness becomes a
valuable ``resource'' that needs to be efficiently
managed. A common approach in system design is to be
nonwork conserving by ``idle waiting'', that is, delay
the scheduling of background jobs to avoid slowing down
upcoming foreground tasks.\par
In this article, we complement ``idle waiting'' with
the ``estimation'' of background work to be served in
every idle interval to effectively manage the trade-off
between the performance of foreground and background
tasks. As a result, the storage system is better
utilized without compromising foreground performance.
Our analysis shows that if idle times have low
variability, then idle waiting is not necessary. Only
if idle times are highly variable does idle waiting
become necessary to minimize the impact of background
activity on foreground performance. We further show
that if there is burstiness in idle intervals, then it
is possible to predict accurately the length of
incoming idle intervals and use this information to
serve more background jobs without affecting foreground
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "background jobs; continuous data histogram; foreground
jobs; idle periods; idleness; low priority work;
performance guarantee; resource management; storage
systems",
}
@Article{Storer:2009:PSR,
author = "Mark W. Storer and Kevin M. Greenan and Ethan L.
Miller and Kaladhar Voruganti",
title = "{POTSHARDS} --- a secure, recoverable, long-term
archival storage system",
journal = j-TOS,
volume = "5",
number = "2",
pages = "5:1--5:??",
month = jun,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1534912.1534914",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:46 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Users are storing ever-increasing amounts of
information digitally, driven by many factors including
government regulations and the public's desire to
digitally record their personal histories.
Unfortunately, many of the security mechanisms that
modern systems rely upon, such as encryption, are
poorly suited for storing data for indefinitely long
periods of time; it is very difficult to manage keys
and update cryptosystems to provide secrecy through
encryption over periods of decades. Worse, an adversary
who can compromise an archive need only wait for
cryptanalysis techniques to catch up to the encryption
algorithm used at the time of the compromise in order
to obtain ``secure'' data. To address these concerns,
we have developed POTSHARDS, an archival storage system
that provides long-term security for data with very
long lifetimes without using encryption. Secrecy is
achieved by using unconditionally secure secret
splitting and spreading the resulting shares across
separately managed archives. Providing availability and
data recovery in such a system can be difficult; thus,
we use a new technique, approximate pointers, in
conjunction with secure distributed RAID techniques to
provide availability and reliability across independent
archives. To validate our design, we developed a
prototype POTSHARDS implementation. In addition to
providing us with an experimental testbed, this
prototype helped us to understand the design issues
that must be addressed in order to maximize security.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "approximate pointers; Archival storage; secret
splitting",
}
@Article{Bhadkamkar:2009:SSS,
author = "Medha Bhadkamkar and Fernando Farfan and Vagelis
Hristidis and Raju Rangaswami",
title = "Storing semi-structured data on disk drives",
journal = j-TOS,
volume = "5",
number = "2",
pages = "6:1--6:??",
month = jun,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1534912.1534915",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:46 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Applications that manage semi-structured data are
becoming increasingly commonplace. Current approaches
for storing semi-structured data use existing storage
machinery; they either map the data to relational
databases, or use a combination of flat files and
indexes. While employing these existing storage
mechanisms provides readily available solutions, there
is a need to more closely examine their suitability to
this class of data. Particularly, retrofitting existing
solutions for semi-structured data can result in a
mismatch between the tree structure of the data and the
access characteristics of the underlying storage device
(disk drive). This study explores various possibilities
in the design space of native storage solutions for
semi-structured data by exploring alternative
approaches that match application data access
characteristics to those of the underlying disk drive.
For evaluating the effectiveness of the proposed native
techniques in relation to the existing solution, we
experiment with XML data using the XPathMark benchmark.
Extensive evaluation reveals the strengths and
weaknesses of the proposed native data layout
techniques. While the existing solutions work really
well for {\em deep-focused\/} queries into a
semi-structured document (those that result in
retrieving entire subtrees), the proposed native
solutions substantially outperform for the {\em
non-deep-focused\/} queries, which we demonstrate are
at least as important as the deep-focused. We believe
that native data layout techniques offer a unique
direction for improving the performance of
semi-structured data stores for a variety of important
workloads. However, given that the proposed native
techniques require circumventing current storage stack
abstractions, further investigation is warranted before
they can be applied to general-purpose storage
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Semi-structured data; storage management; XML",
}
@Article{Thomasian:2009:HRR,
author = "Alexander Thomasian and Mario Blaum",
title = "Higher reliability redundant disk arrays:
Organization, operation, and coding",
journal = j-TOS,
volume = "5",
number = "3",
pages = "7:1--7:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629075.1629076",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:57 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Parity is a popular form of data protection in {\em
redundant arrays of inexpensive/independent disks
(RAID)}. RAID5 dedicates one out of {\em N\/} disks to
parity to mask single disk failures, that is, the
contents of a block on a failed disk can be
reconstructed by exclusive-ORing the corresponding
blocks on surviving disks. RAID5 can mask a single disk
failure, and it is vulnerable to data loss if a second
disk failure occurs. The RAID5 rebuild process
systematically reconstructs the contents of a failed
disk on a spare disk, returning the system to its
original state, but the rebuild process may be
unsuccessful due to unreadable sectors. This has led to
{\em two disk failure tolerant arrays (2DFTs)}, such as
RAID6 based on Reed--Solomon (RS) codes. EVENODD, RDP
(Row-Diagonal-Parity), the X-code, and RM2 (Row-Matrix)
are 2DFTs with parity coding. RM2 incurs a higher level
of redundancy than two disks, while the X-code is
limited to a prime number of disks. RDP is optimal with
respect to the number of XOR operations at the
encoding, but not for short write operations. For small
symbol sizes EVENODD and RDP have the same disk access
pattern as RAID6, while RM2 and the X-code incur a high
recovery cost with two failed disks. We describe
variations to RAID5 and RAID6 organizations, including
clustered RAID, different methods to update parities,
rebuild processing, disk scrubbing to eliminate sector
errors, and the {\em intra-disk redundancy (IDR)\/}
method to deal with sector errors. We summarize the
results of recent studies of failures in hard disk
drives. We describe Markov chain reliability models to
estimate RAID {\em mean time to data loss (MTTDL)\/}
taking into account sector errors and the effect of
disk scrubbing. Numerical results show that RAID5 plus
IDR attains the same MTTDL level as RAID6, while
incurring a lower performance penalty. We conclude with
a survey of analytic and simulation studies of RAID
performance and tools and benchmarks for RAID
performance evaluation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Disk array; disk failure studies; performance
evaluation; RAID; reliability evaluation",
}
@Article{Tosun:2009:DCS,
author = "Ali {\c{S}}aman Tosun",
title = "Divide-and-conquer scheme for strictly optimal
retrieval of range queries",
journal = j-TOS,
volume = "5",
number = "3",
pages = "8:1--8:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629075.1629077",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:57 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Declustering distributes data among parallel disks to
reduce retrieval cost using I/O parallelism. Many
schemes were proposed for single copy declustering of
spatial data. Recently, declustering using replication
gained a lot of interest and several schemes with
different properties were proposed. It is
computationally expensive to verify optimality of
replication schemes designed for range queries and
existing schemes verify optimality for up to 50 disks.
In this article, we propose a novel method to find
replicated declustering schemes that render all spatial
range queries optimal. The proposed scheme uses
threshold based declustering, divisibility of large
queries for optimization and optimistic approach to
compute maximum flow. The proposed scheme is generic
and works for any number of dimensions. Experimental
results show that using 3 copies there exist
allocations that render all spatial range queries
optimal for up to 750 disks in 2 dimensions and with
the exception of several values for up to 100 disks in
3 dimensions. The proposed scheme improves search for
strictly optimal replicated declustering schemes
significantly and will be a valuable tool to answer
open problems on replicated declustering.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Declustering; number theory; parallel I/0;
replication; spatial range query; threshold",
}
@Article{Qin:2009:DLB,
author = "Xiao Qin and Hong Jiang and Adam Manzanares and
Xiaojun Ruan and Shu Yin",
title = "Dynamic load balancing for {I/O}-intensive
applications on clusters",
journal = j-TOS,
volume = "5",
number = "3",
pages = "9:1--9:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629075.1629078",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:57 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Load balancing for clusters has been investigated
extensively, mainly focusing on the effective usage of
global CPU and memory resources. However, previous CPU-
or memory-centric load balancing schemes suffer
significant performance drop under I/O-intensive
workloads due to the imbalance of I/O load. To solve
this problem, we propose two simple yet effective
I/O-aware load-balancing schemes for two types of
clusters: (1) homogeneous clusters where nodes are
identical and (2) heterogeneous clusters, which are
comprised of a variety of nodes with different
performance characteristics in computing power, memory
capacity, and disk speed. In addition to assigning
I/O-intensive sequential and parallel jobs to nodes
with light I/O loads, the proposed schemes judiciously
take into account both CPU and memory load sharing in
the system. Therefore, our schemes are able to maintain
high performance for a wide spectrum of workloads. We
develop analytic models to study mean slowdowns, task
arrival, and transfer processes in system levels. Using
a set of real I/O-intensive parallel applications and
synthetic parallel jobs with various I/O
characteristics, we show that our proposed schemes
consistently improve the performance over existing
non-I/O-aware load-balancing schemes, including CPU-
and Memory-aware schemes and a PBS-like batch scheduler
for parallel and sequential jobs, for a diverse set of
workload conditions. Importantly, this performance
improvement becomes much more pronounced when the
applications are I/O-intensive. For example, the
proposed approaches deliver 23.6--88.0 \% performance
improvements for I/O-intensive applications such as LU
decomposition, Sparse Cholesky, Titan, Parallel text
searching, and Data Mining. When I/O load is low or
well balanced, the proposed schemes are capable of
maintaining the same level of performance as the
existing non-I/O-aware schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "clusters; heterogeneity; I/O-intensive applications;
Load balancing; storage systems",
}
@Article{Xie:2009:FAS,
author = "Tao Xie and Yao Sun",
title = "A file assignment strategy independent of workload
characteristic assumptions",
journal = j-TOS,
volume = "5",
number = "3",
pages = "10:1--10:??",
month = nov,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629075.1629079",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:33:57 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The problem of statically assigning nonpartitioned
files in a parallel I/O system has been extensively
investigated. A basic workload characteristic
assumption of most existing solutions to the problem is
that there exists a strong inverse correlation between
file access frequency and file size. In other words,
the most popular files are typically small in size,
while the large files are relatively unpopular. Recent
studies on the characteristics of Web proxy traces
suggested, however, the correlation, if any, is so weak
that it can be ignored. Hence, the following two
questions arise naturally. First, can existing
algorithms still perform well when the workload
assumption does not hold? Second, if not, can one
develop a new file assignment strategy that is immune
to the workload assumption? To answer these questions,
we first evaluate the performance of three well-known
file assignment algorithms with and without the
workload assumption, respectively. Next, we develop a
novel static nonpartitioned file assignment strategy
for parallel I/O systems, called static round-robin
(SOR), which is immune to the workload assumption.
Comprehensive experimental results show that SOR
consistently improves the performance in terms of mean
response time over the existing schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "File assignment; load balancing; parallel I/O;
workload characteristics; Zipfian distribution",
}
@Article{Seltzer:2009:ISI,
author = "Margo Seltzer and Ric Wheeler",
title = "Introduction to special issue {FAST 2009}",
journal = j-TOS,
volume = "5",
number = "4",
pages = "11:1--11:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629080.1629081",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:34:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hasan:2009:PHF,
author = "Ragib Hasan and Radu Sion and Marianne Winslett",
title = "Preventing history forgery with secure provenance",
journal = j-TOS,
volume = "5",
number = "4",
pages = "12:1--12:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629080.1629082",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:34:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "As increasing amounts of valuable information are
produced and persist digitally, the ability to
determine the origin of data becomes important. In
science, medicine, commerce, and government, data
provenance tracking is essential for rights protection,
regulatory compliance, management of intelligence and
medical data, and authentication of information as it
flows through workplace tasks. While significant
research has been conducted in this area, the
associated security and privacy issues have not been
explored, leaving provenance information vulnerable to
illicit alteration as it passes through untrusted
environments.\par
In this article, we show how to provide strong
integrity and confidentiality assurances for data
provenance information at the kernel, file system, or
application layer. We describe Sprov, our
provenance-aware system prototype that implements
provenance tracking of data writes at the application
layer, which makes Sprov extremely easy to deploy. We
present empirical results that show that, for real-life
workloads, the runtime overhead of Sprov for recording
provenance with confidentiality and integrity
guarantees ranges from 1\% to 13\%, when all file
modifications are recorded, and from 12\% to 16\%, when
all file read and modifications are tracked.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "audit; confidentiality; integrity; lineage;
provenance; security",
}
@Article{Muniswamy-Reddy:2009:CBV,
author = "Kiran-Kumar Muniswamy-Reddy and David A. Holland",
title = "Causality-based versioning",
journal = j-TOS,
volume = "5",
number = "4",
pages = "13:1--13:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629080.1629083",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:34:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Versioning file systems provide the ability to recover
from a variety of failures, including file corruption,
virus and worm infestations, and user mistakes.
However, using versions to recover from data-corrupting
events requires a human to determine precisely which
files and versions to restore. We can create more
meaningful versions and enhance the value of those
versions by capturing the causal connections among
files, facilitating selection and recovery of precisely
the right versions after data corrupting events.\par
We determine when to create new versions of files
automatically using the causal relationships among
files. The literature on versioning file systems
usually examines two extremes of possible
version-creation algorithms: open-to-close versioning
and versioning on every write. We evaluate causal
versions of these two algorithms and introduce two
additional causality-based algorithms: Cycle-Avoidance
and Graph-Finesse.\par
We show that capturing and maintaining causal
relationships imposes less than 7\% overhead on a
versioning system, providing benefit at low cost. We
then show that Cycle-Avoidance provides more meaningful
versions of files created during concurrent program
execution, with overhead comparable to open/close
versioning. Graph-Finesse provides even greater
control, frequently at comparable overhead, but
sometimes at unacceptable overhead. Versioning on every
write is an interesting extreme case, but is far too
costly to be useful in practice.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "causality; data provenance",
}
@Article{Vrable:2009:CFB,
author = "Michael Vrable and Stefan Savage and Geoffrey M.
Voelker",
title = "{Cumulus}: Filesystem backup to the cloud",
journal = j-TOS,
volume = "5",
number = "4",
pages = "14:1--14:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629080.1629084",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:34:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Cumulus is a system for efficiently implementing
filesystem backups over the Internet, specifically
designed under a {\em thin cloud\/} assumption --- that
the remote datacenter storing the backups does not
provide any special backup services, but only a
least-common-denominator storage interface. Cumulus
aggregates data from small files for storage and uses
LFS-inspired segment cleaning to maintain storage
efficiency. While Cumulus can use virtually any storage
service, we show its efficiency is comparable to
integrated approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Backup; cloud storage",
}
@Article{Batsakis:2009:CNC,
author = "Alexandros Batsakis and Randal Burns and Arkady
Kanevsky and James Lentini and Thomas Talpey",
title = "{CA-NFS}: a congestion-aware network file system",
journal = j-TOS,
volume = "5",
number = "4",
pages = "15:1--15:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629080.1629085",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:34:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We develop a holistic framework for adaptively
scheduling asynchronous requests in distributed file
systems. The system is holistic in that it manages all
resources, including network bandwidth, server I/O,
server CPU, and client and server memory utilization.
It accelerates, defers, or cancels asynchronous
requests in order to improve application-perceived
performance directly. We employ congestion pricing via
online auctions to coordinate the use of system
resources by the file system clients so that they can
detect shortages and adapt their resource usage. We
implement our modifications in the Congestion-Aware
Network File System (CA-NFS), an extension to the
ubiquitous network file system (NFS). Our experimental
result shows that CA-NFS results in a 20\% improvement
in execution times when compared with NFS for a variety
of workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "congestion; File systems; NFS; performance;
scalability",
}
@Article{Agrawal:2009:GRI,
author = "Nitin Agrawal and Andrea C. Arpaci-Dusseau and Remzi
H. Arpaci-Dusseau",
title = "Generating realistic {{\em Impressions\/}} for
file-system benchmarking",
journal = j-TOS,
volume = "5",
number = "4",
pages = "16:1--16:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1629080.1629086",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 16 15:34:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The performance of file systems and related software
depends on characteristics of the underlying
file-system image (i.e., file-system metadata and file
contents). Unfortunately, rather than benchmarking with
realistic file-system images, most system designers and
evaluators rely on {\em ad hoc\/} assumptions and
(often inaccurate) rules of thumb. Furthermore, the
lack of standardization and reproducibility makes
file-system benchmarking ineffective. To remedy these
problems, we develop Impressions, a framework to
generate statistically accurate file-system images with
realistic metadata and content. Impressions is
flexible, supporting user-specified constraints on
various file-system parameters using a number of
statistical techniques to generate consistent images.
In this article, we present the design, implementation,
and evaluation of Impressions and demonstrate its
utility using desktop search as a case study. We
believe Impressions will prove to be useful to system
developers and users alike.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "File and storage system benchmarking",
}
@Article{Khatib:2010:OMB,
author = "Mohammed G. Khatib and Pieter H. Hartel",
title = "Optimizing {MEMS}-based storage devices for mobile
battery-powered systems",
journal = j-TOS,
volume = "6",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1714454.1714455",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 14 17:04:28 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "An emerging storage technology, called MEMS-based
storage, promises nonvolatile storage devices with
ultrahigh density, high rigidity, a small form factor,
and low cost. For these reasons, MEMS-based storage
devices are suitable for battery-powered mobile systems
such as PDAs. For deployment in such systems,
MEMS-based storage devices must consume little energy.
This work mainly targets reducing the energy
consumption of this class of devices.\par
We derive the operation modes of a MEMS-based storage
device and systemically devise a policy in each mode
for energy saving. Three types of policies are
presented: power management, shutdown, and data-layout
policy. Combined, these policies reduce the total
energy consumed by a MEMS-based storage device. A
MEMS-based storage device that enforces these policies
comes close to Flash with respect to energy consumption
and response time. However, enhancement on the device
level is still needed; we present some suggestions to
resolve this issue.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "design space; energy efficiency; green storage; mobile
systems; Probe storage",
}
@Article{Yu:2010:NVS,
author = "Young Jin Yu and Dong In Shin and Hyeonsang Eom and
Heon Young Yeom",
title = "{NCQ} vs. {I/O} scheduler: Preventing unexpected
misbehaviors",
journal = j-TOS,
volume = "6",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1714454.1714456",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 14 17:04:28 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Native Command Queueing (NCQ) is an optimization
technology to maximize throughput by reordering
requests inside a disk drive. It has been so successful
that NCQ has become the standard in SATA 2 protocol
specification, and the great majority of disk vendors
have adopted it for their recent disks. However, there
is a possibility that the technology may lead to an
information gap between the OS and a disk drive. A
NCQ-enabled disk tries to optimize throughput without
realizing the intention of an OS, whereas the OS does
its best under the assumption that the disk will do as
it is told without specific knowledge regarding the
details of the disk mechanism. Let us call this {\em
expectation discord}, which may cause serious problems
such as request starvations or performance anomaly. In
this article, we (1) confirm that {\em expectation
discord\/} actually occurs in real systems; (2) propose
software-level approaches to solve them; and (3)
evaluate our mechanism. Experimental results show that
our solution is simple, cheap (no special hardware
required), portable, and effective.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "hybrid scheduling; I/O prioritization; NCQ; SATA 2;
starvation detection",
}
@Article{Jung:2010:FES,
author = "Jaemin Jung and Youjip Won and Eunki Kim and Hyungjong
Shin and Byeonggil Jeon",
title = "{FRASH}: Exploiting storage class memory in hybrid
file system for hierarchical storage",
journal = j-TOS,
volume = "6",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1714454.1714457",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 14 17:04:28 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In this work, we develop a novel hybrid file system,
FRASH, for storage-class memory and NAND Flash. Despite
the promising physical characteristics of storage-class
memory, its scale is an order of magnitude smaller than
the current storage device scale. This fact makes it
less than desirable for use as an independent storage
device. We carefully analyze in-memory and on-disk file
system objects in a log-structured file system, and
exploit memory and storage aspects of the storage-class
memory to overcome the drawbacks of the current
log-structured file system. FRASH provides a hybrid
view storage-class memory. It harbors an in-memory data
structure as well as a on-disk structure. It provides
nonvolatility to key data structures which have been
maintained in-memory in a legacy log-structured file
system. This approach greatly improves the mount
latency and effectively resolves the robustness issue.
By maintaining on-disk structure in storage-class
memory, FRASH provides byte-addressability to the file
system object and metadata for page, and subsequently
greatly improves the I/O performance compared to the
legacy log-structured approach. While storage-class
memory offers byte granularity, it is still far slower
than its DRAM counter part. We develop a copy-on-mount
technique to overcome the access latency difference
between main memory and storage-class memory. Our file
system was able to reduce the mount time by 92\% and
file system I/O performance was increased by 16\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Flash storage; log-structured file system",
}
@Article{Balakrishnan:2010:DRR,
author = "Mahesh Balakrishnan and Asim Kadav and Vijayan
Prabhakaran and Dahlia Malkhi",
title = "Differential {RAID}: Rethinking {RAID} for {SSD}
reliability",
journal = j-TOS,
volume = "6",
number = "2",
pages = "4:1--4:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1807060.1807061",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 14 17:04:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "SSDs exhibit very different failure characteristics
compared to hard drives. In particular, the bit error
rate (BER) of an SSD climbs as it receives more writes.
As a result, RAID arrays composed from SSDs are subject
to correlated failures. By balancing writes evenly
across the array, RAID schemes can wear out devices at
similar times. When a device in the array fails towards
the end of its lifetime, the high BER of the remaining
devices can result in data loss. We propose Diff-RAID,
a parity-based redundancy solution that creates an age
differential in an array of SSDs. Diff-RAID distributes
parity blocks unevenly across the array, leveraging
their higher update rate to age devices at different
rates. To maintain this age differential when old
devices are replaced by new ones, Diff-RAID reshuffles
the parity distribution on each drive replacement. We
evaluate Diff-RAID's reliability by using real BER data
from 12 flash chips on a simulator and show that it is
more reliable than RAID-5, in some cases by multiple
orders of magnitude. We also evaluate Diff-RAID's
performance using a software implementation on a
5-device array of 80 GB Intel X25-M SSDs and show that
it offers a trade-off between throughput and
reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "flash; RAID; SSD",
}
@Article{Chang:2010:SEN,
author = "Yuan-Hao Chang and Jian-Hong Lin and Jen-Wei Hsieh and
Tei-Wei Kuo",
title = "A strategy to emulate {NOR} flash with {NAND} flash",
journal = j-TOS,
volume = "6",
number = "2",
pages = "5:1--5:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1807060.1807062",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 14 17:04:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "This work is motivated by a strong market demand for
the replacement of NOR flash memory with NAND flash
memory to cut down the cost of many embedded-system
designs, such as mobile phones. Different from
LRU-related caching or buffering studies, we are
interested in prediction-based prefetching based on
given execution traces of application executions. An
implementation strategy is proposed for the storage of
the prefetching information with limited SRAM and
run-time overheads. An efficient prediction procedure
is presented based on information extracted from
application executions to reduce the performance gap
between NAND flash memory and NOR flash memory in
reads. With the behavior of a target application
extracted from a set of collected traces, we show that
data access to NOR flash memory can respond effectively
over the proposed implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "data prefetching; flash memory; NAND; NOR",
}
@Article{Gim:2010:EIQ,
author = "Jongmin Gim and Youjip Won",
title = "Extract and infer quickly: Obtaining sector geometry
of modern hard disk drives",
journal = j-TOS,
volume = "6",
number = "2",
pages = "6:1--6:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1807060.1807063",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 14 17:04:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The modern hard disk drive is a complex and
complicated device. It consists of 2--4 heads,
thousands of sectors per track, several hundred
thousands of tracks, and tens of zones. The beginnings
of adjacent tracks are placed with a certain angular
offset. Sectors are placed on the tracks and accessed
in some order. Angular offset and sector placement
order vary widely subject to vendors and models. The
success of an efficient file and storage subsystem
design relies on the proper understanding of the
underlying storage device characteristics. The
characterization of hard disk drives has been a subject
of intense research for more than a decade. The scale
and complexity of state-of-the-art hard disk drive
technology calls for a new way of extracting and
analyzing the characteristics of the hard disk drive.
In this work, we develop a novel disk characterization
suite, DIG (Disk Geometry Analyzer), which allows us to
rapidly extract and characterize the key performance
metrics of the modern hard disk drive. Development of
this tool is accompanied by thorough examination of
four off-the-shelf hard disk drives. DIG consists of
three key ingredients: $ O(1) $ a track boundary
detection algorithm; $ O(\log n) $ a zone boundary
detection algorithm; and hybrid sampling based seek
time profiling. We particularly focus on addressing the
scalability aspect of disk characterization. With DIG,
we are able to extract key metrics of hard disk drives,
for example, track sizes, zone information, sector
geometry and so on, within 3--20 minutes. DIG allows us
to determine the sector layout mechanism of the
underlying hard disk drive, for example, hybrid
serpentine, cylinder serpentine, and surface
serpentine, and to a build complete sector map from LBN
to the three dimensional space of (Cylinder, Head,
Sector). Examining the hard disk drives with DIG, we
made a number of important observations. In modern hard
disk drives, head switch overhead is far greater than
track switch overhead. It seems that hard disk drive
vendors put greater emphasis on reducing the number of
head switches for data access. Most disk vendors use
surface serpentine, cylinder serpentine, or hybrid
serpentine schemes in laying sectors on the platters.
The legacy seek time model, which takes the form of $ a
+ b \sqrt d $ leaves much to be desired for use in
modern hard disk drives especially for short seeks
(less than 5000 tracks). We compare the performance of
the DIG against the existing state-of-the-art disk
profiling algorithm. Compared to the existing
state-of-the-art disk characterization algorithm, the
DIG algorithm significantly decreases the time to
extract comprehensive sector geometry information from
1920 minutes to 7 minutes and 1927 minutes to 180
minutes in best and worst case scenarios,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Hard disk; performance characterization; sector
geometry; seek time; track skew; zone",
}
@Article{Wang:2010:SSO,
author = "Yang Wang and Jiwu Shu and Guangyan Zhang and Wei Xue
and Weimin Zheng",
title = "{SOPA}: Selecting the optimal caching policy
adaptively",
journal = j-TOS,
volume = "6",
number = "2",
pages = "7:1--7:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1807060.1807064",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 14 17:04:39 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "With the development of storage technology and
applications, new caching policies are continuously
being introduced. It becomes increasingly important for
storage systems to be able to select the matched
caching policy dynamically under varying workloads.
This article proposes SOPA, a cache framework to
adaptively select the matched policy and perform policy
switches in storage systems. SOPA encapsulates the
functions of a caching policy into a module, and
enables online policy switching by policy
reconstruction. SOPA then selects the policy matched
with the workload dynamically by collecting and
analyzing access traces. To reduce the decision-making
cost, SOPA proposes an asynchronous decision making
process. The simulation experiments show that no single
caching policy performed well under all of the
different workloads. With SOPA, a storage system could
select the appropriate policy for different workloads.
The real-system evaluation results show that SOPA
reduced the average response time by up to 20.3\% and
11.9\% compared with LRU and ARC, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
keywords = "Caching policies; policy adaptation; policy switch",
}
@Article{Burns:2010:GEF,
author = "Randal Burns and Kimberly Keeton",
title = "Guest editorial: {FAST'10}",
journal = j-TOS,
volume = "6",
number = "3",
pages = "8:1--8:??",
month = sep,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1837915.1837916",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 23 10:40:15 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Schroeder:2010:ULS,
author = "Bianca Schroeder and Sotirios Damouras and Phillipa
Gill",
title = "Understanding latent sector errors and how to protect
against them",
journal = j-TOS,
volume = "6",
number = "3",
pages = "9:1--9:??",
month = sep,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1837915.1837917",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 23 10:40:15 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sehgal:2010:OEP,
author = "Priya Sehgal and Vasily Tarasov and Erez Zadok",
title = "Optimizing energy and performance for server-class
file system workloads",
journal = j-TOS,
volume = "6",
number = "3",
pages = "10:1--10:??",
month = sep,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1837915.1837918",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 23 10:40:15 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sundararaman:2010:MOS,
author = "Swaminathan Sundararaman and Sriram Subramanian and
Abhishek Rajimwale and Andrea C. Arpaci-Dusseau and
Remzi H. Arpaci-Dusseau and Michael M. Swift",
title = "{Membrane}: {Operating} system support for restartable
file systems",
journal = j-TOS,
volume = "6",
number = "3",
pages = "11:1--11:??",
month = sep,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1837915.1837919",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 23 10:40:15 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Veeraraghavan:2010:QRF,
author = "Kaushik Veeraraghavan and Jason Flinn and Edmund B.
Nightingale and Brian Noble",
title = "{quFiles}: {The} right file at the right time",
journal = j-TOS,
volume = "6",
number = "3",
pages = "12:1--12:??",
month = sep,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1837915.1837920",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 23 10:40:15 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Koller:2010:DUC,
author = "Ricardo Koller and Raju Rangaswami",
title = "{I/O Deduplication}: {Utilizing} content similarity to
improve {I/O} performance",
journal = j-TOS,
volume = "6",
number = "3",
pages = "13:1--13:??",
month = sep,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1837915.1837921",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 23 10:40:15 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Josephson:2010:DFS,
author = "William K. Josephson and Lars A. Bongo and Kai Li and
David Flynn",
title = "{DFS}: a file system for virtualized flash storage",
journal = j-TOS,
volume = "6",
number = "3",
pages = "14:1--14:??",
month = sep,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1837915.1837922",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 23 10:40:15 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Shim:2011:HFT,
author = "Gyudong Shim and Youngwoo Park and Kyu Ho Park",
title = "A hybrid flash translation layer with adaptive merge
for {SSDs}",
journal = j-TOS,
volume = "6",
number = "4",
pages = "15:1--15:??",
month = may,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970338.1970339",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Jun 7 18:40:46 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Plank:2011:MDR,
author = "James S. Plank and Adam L. Buchsbaum and Bradley T.
{Vander Zanden}",
title = "Minimum density {RAID-6} codes",
journal = j-TOS,
volume = "6",
number = "4",
pages = "16:1--16:??",
month = may,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970338.1970340",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Jun 7 18:40:46 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Tian:2011:OAU,
author = "Lei Tian and Qiang Cao and Hong Jiang and Dan Feng and
Changsheng Xie and Qin Xin",
title = "Online availability upgrades for parity-based {RAIDs}
through supplementary parity augmentations",
journal = j-TOS,
volume = "6",
number = "4",
pages = "17:1--17:??",
month = may,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970338.1970341",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Jun 7 18:40:46 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chang:2011:DLC,
author = "Yuan-Hao Chang and Ping-Yi Hsu and Yung-Feng Lu and
Tei-Wei Kuo",
title = "A driver-layer caching policy for removable storage
devices",
journal = j-TOS,
volume = "7",
number = "1",
pages = "1:1--1:??",
month = jun,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970343.1970344",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Oct 22 09:33:53 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Tomazic:2011:FFE,
author = "Saso Tomazic and Vesna Pavlovic and Jasna Milovanovic
and Jaka Sodnik and Anton Kos and Sara Stancin and
Veljko Milutinovic",
title = "Fast file existence checking in archiving systems",
journal = j-TOS,
volume = "7",
number = "1",
pages = "2:1--2:??",
month = jun,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970343.1970345",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Oct 22 09:33:53 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Manzanares:2011:PBP,
author = "Adam Manzanares and Xiao Qin and Xiaojun Ruan and Shu
Yin",
title = "{PRE-BUD}: {Prefetching} for energy-efficient parallel
{I/O} systems with buffer disks",
journal = j-TOS,
volume = "7",
number = "1",
pages = "3:1--3:??",
month = jun,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970343.1970346",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Oct 22 09:33:53 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Shin:2011:RBI,
author = "Dong In Shin and Young Jin Yu and Hyeong S. Kim and
Hyeonsang Eom and Heon Young Yeom",
title = "Request Bridging and Interleaving: Improving the
Performance of Small Synchronous Updates under
Seek-Optimizing Disk Subsystems",
journal = j-TOS,
volume = "7",
number = "2",
pages = "4:1--4:??",
month = jul,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970348.1970349",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Oct 22 09:33:54 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Iliadis:2011:DSV,
author = "Ilias Iliadis and Robert Haas and Xiao-Yu Hu and
Evangelos Eleftheriou",
title = "Disk Scrubbing Versus Intradisk Redundancy for {RAID}
Storage Systems",
journal = j-TOS,
volume = "7",
number = "2",
pages = "5:1--5:??",
month = jul,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970348.1970350",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Oct 22 09:33:54 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{You:2011:PFE,
author = "Lawrence L. You and Kristal T. Pollack and Darrell D.
E. Long and K. Gopinath",
title = "{PRESIDIO}: a Framework for Efficient Archival Data
Storage",
journal = j-TOS,
volume = "7",
number = "2",
pages = "6:1--6:??",
month = jul,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1970348.1970351",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Oct 22 09:33:54 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Brinkmann:2011:GE,
author = "Andr{\'e} Brinkmann and David Pease",
title = "Guest Editorial",
journal = j-TOS,
volume = "7",
number = "3",
pages = "7:1--7:??",
month = oct,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2027066.2027067",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Nov 6 06:42:42 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Carns:2011:UIC,
author = "Philip Carns and Kevin Harms and William Allcock and
Charles Bacon and Samuel Lang and Robert Latham and
Robert Ross",
title = "Understanding and Improving Computational Science
Storage Access through Continuous Characterization",
journal = j-TOS,
volume = "7",
number = "3",
pages = "8:1--8:??",
month = oct,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2027066.2027068",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Nov 6 06:42:42 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2011:YCY,
author = "Xuechen Zhang and Yuehai Xu and Song Jiang",
title = "{YouChoose}: Choosing your Storage Device as a
Performance Interface to Consolidated {I/O} Service",
journal = j-TOS,
volume = "7",
number = "3",
pages = "9:1--9:??",
month = oct,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2027066.2027069",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Nov 6 06:42:42 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Huang:2011:RRT,
author = "Zhen Huang and Ernst Biersack and Yuxing Peng",
title = "Reducing Repair Traffic in {P2P} Backup Systems: Exact
Regenerating Codes on Hierarchical Codes",
journal = j-TOS,
volume = "7",
number = "3",
pages = "10:1--10:??",
month = oct,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2027066.2027070",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Nov 6 06:42:42 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Xiang:2011:HAF,
author = "Liping Xiang and Yinlong Xu and John C. S. Lui and
Qian Chang and Yubiao Pan and Runhui Li",
title = "A Hybrid Approach to Failed Disk Recovery Using
{RAID-6} Codes: Algorithms and Performance Evaluation",
journal = j-TOS,
volume = "7",
number = "3",
pages = "11:1--11:??",
month = oct,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2027066.2027071",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Nov 6 06:42:42 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Agrawal:2012:EGS,
author = "Nitin Agrawal and Leo Arulraj and Andrea C.
Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "Emulating goliath storage systems with {David}",
journal = j-TOS,
volume = "7",
number = "4",
pages = "12:1--12:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2078861.2078862",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 16 15:48:58 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Benchmarking file and storage systems on large
file-system images is important, but difficult and
often infeasible. Typically, running benchmarks on such
large disk setups is a frequent source of frustration
for file-system evaluators; the scale alone acts as a
strong deterrent against using larger, albeit
realistic, benchmarks. To address this problem, we
develop David: a system that makes it practical to run
large benchmarks using modest amount of storage or
memory capacities readily available on most computers.
David creates a `compressed' version of the original
file-system image by omitting all file data and laying
out metadata more efficiently; an online storage model
determines the runtime of the benchmark workload on the
original uncompressed image. David works under any file
system, as demonstrated in this article with ext3 and
btrfs. We find that David reduces storage requirements
by orders of magnitude; David is able to emulate a 1-TB
target workload using only an 80 GB available disk,
while still modeling the actual runtime accurately.
David can also emulate newer or faster devices, for
example, we show how David can effectively emulate a
multidisk RAID using a limited amount of memory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sundararaman:2012:MCC,
author = "Swaminathan Sundararaman and Yupu Zhang and Sriram
Subramanian and Andrea C. Arpaci-Dusseau and Remzi H.
Arpaci-Dusseau",
title = "Making the common case the only case with anticipatory
memory allocation",
journal = j-TOS,
volume = "7",
number = "4",
pages = "13:1--13:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2078861.2078863",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 16 15:48:58 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We present anticipatory memory allocation (AMA), a new
method to build kernel code that is robust to
memory-allocation failures. AMA avoids the usual
difficulties in handling allocation failures through a
novel combination of static and dynamic techniques.
Specifically, a developer, with assistance from AMA
static analysis tools, determines how much memory a
particular call into a kernel subsystem will need, and
then preallocates said amount immediately upon entry to
the kernel; subsequent allocation requests are serviced
from the preallocated pool and thus guaranteed never to
fail. We describe the static and runtime components of
AMA, and then present a thorough evaluation of Linux
ext2-mfr, a case study in which we transform the Linux
ext2 file system into a memory-failure robust version
of itself. Experiments reveal that ext2-mfr avoids
memory-allocation failures successfully while incurring
little space or time overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Meyer:2012:SPD,
author = "Dutch T. Meyer and William J. Bolosky",
title = "A study of practical deduplication",
journal = j-TOS,
volume = "7",
number = "4",
pages = "14:1--14:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2078861.2078864",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 16 15:48:58 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We collected file system content data from 857 desktop
computers at Microsoft over a span of 4 weeks. We
analyzed the data to determine the relative efficacy of
data deduplication, particularly considering whole-file
versus block-level elimination of redundancy. We found
that whole-file deduplication achieves about three
quarters of the space savings of the most aggressive
block-level deduplication for storage of live file
systems, and 87\% of the savings for backup images. We
also studied file fragmentation, finding that it is not
prevalent, and updated prior file system metadata
studies, finding that the distribution of file sizes
continues to skew toward very large unstructured
files.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wu:2012:AWB,
author = "Guanying Wu and Xubin He and Ben Eckart",
title = "An adaptive write buffer management scheme for
flash-based {SSDs}",
journal = j-TOS,
volume = "8",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2093139.2093140",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 16 15:48:59 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Solid State Drives (SSD's) have shown promise to be a
candidate to replace traditional hard disk drives. The
benefits of SSD's over HDD's include better durability,
higher performance, and lower power consumption, but
due to certain physical characteristics of NAND flash,
which comprise SSD's, there are some challenging areas
of improvement and further research. We focus on the
layout and management of the small amount of RAM that
serves as a cache between the SSD and the system that
uses it. Of the techniques that have previously been
proposed to manage this cache, we identify several
sources of inefficient cache space management due to
the way pages are clustered in blocks and the limited
replacement policy. We find that in many traces hot
pages reside in otherwise cold blocks, and that the
spatial locality of most clusters can be fully
exploited in a limited time period, so we develop a
hybrid page/block architecture along with an advanced
replacement policy, called BPAC, or Block-Page Adaptive
Cache, to exploit both temporal and spatial locality.
Our technique involves adaptively partitioning the SSD
on-disk cache to separately hold pages with high
temporal locality in a page list and clusters of pages
with low temporal but high spatial locality in a block
list. In addition, we have developed a novel mechanism
for flash-based SSD's to characterize the spatial
locality of the disk I/O workload and an approach to
dynamically identify the set of low spatial locality
clusters. We run trace-driven simulations to verify our
design and find that it outperforms other popular
flash-aware cache schemes under different workloads.
For instance, compared to a popular flash aware cache
algorithm BPLRU, BPAC reduces the number of cache
evictions by up to 79.6\% and 34\% on average.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Luo:2012:ESI,
author = "Jianqiang Luo and Kevin D. Bowers and Alina Oprea and
Lihao Xu",
title = "Efficient software implementations of large finite
fields {$ {\rm GF}(2^n) $} for secure storage
applications",
journal = j-TOS,
volume = "8",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2093139.2093141",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 16 15:48:59 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Finite fields are widely used in constructing
error-correcting codes and cryptographic algorithms. In
practice, error-correcting codes use small finite
fields to achieve high-throughput encoding and
decoding. Conversely, cryptographic systems employ
considerably larger finite fields to achieve high
levels of security. We focus on developing efficient
software implementations of arithmetic operations in
reasonably large finite fields as needed by secure
storage applications. In this article, we study several
arithmetic operation implementations for finite fields
ranging from $ {\rm GF}(2^{32}) $ to $ {\rm
GF}(2^{128}) $. We implement multiplication and
division in these finite fields by making use of
precomputed tables in smaller fields, and several
techniques of extending smaller field arithmetic into
larger field operations. We show that by exploiting
known techniques, as well as new optimizations, we are
able to efficiently support operations over finite
fields of interest. We perform a detailed evaluation of
several techniques, and show that we achieve very
practical performance for both multiplication and
division. Finally, we show how these techniques find
applications in the implementation of HAIL, a highly
available distributed cloud storage layer. Using the
newly implemented arithmetic operations in $ {\rm
GF}(2^{64}) $, HAIL improves its performance by a
factor of two, while simultaneously providing a higher
level of security.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chang:2012:COM,
author = "Yuan-Hao Chang and Cheng-Kang Hsieh and Po-Chun Huang
and Pi-Cheng Hsiu",
title = "A caching-oriented management design for the
performance enhancement of solid-state drives",
journal = j-TOS,
volume = "8",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2093139.2093142",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 16 15:48:59 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "While solid-state drives are excellent alternatives to
hard disks in mobile devices, a number of performance
and reliability issues need to be addressed. In this
work, we design an efficient flash management scheme
for the performance improvement of low-cost MLC flash
memory devices. Specifically, we design an efficient
flash management scheme for multi-chipped flash memory
devices with cache support, and develop a two-level
address translation mechanism with an adaptive caching
policy. We evaluated the approach on real workloads.
The results demonstrate that it can improve the
performance of multi-chipped solid-state drives through
logical-to-physical mappings and concurrent accesses to
flash chips.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Mao:2012:HHP,
author = "Bo Mao and Hong Jiang and Suzhen Wu and Lei Tian and
Dan Feng and Jianxi Chen and Lingfang Zeng",
title = "{HPDA}: a hybrid parity-based disk array for enhanced
performance and reliability",
journal = j-TOS,
volume = "8",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2093139.2093143",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 16 15:48:59 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Flash-based Solid State Drive (SSD) has been
productively shipped and deployed in large scale
storage systems. However, a single flash-based SSD
cannot satisfy the capacity, performance and
reliability requirements of the modern storage systems
that support increasingly demanding data-intensive
computing applications. Applying RAID schemes to SSDs
to meet these requirements, while a logical and viable
solution, faces many challenges. In this article, we
propose a Hybrid Parity-based Disk Array architecture
(short for HPDA), which combines a group of SSDs and
two hard disk drives (HDDs) to improve the performance
and reliability of SSD-based storage systems. In HPDA,
the SSDs (data disks) and part of one HDD (parity disk)
compose a RAID4 disk array. Meanwhile, a second HDD and
the free space of the parity disk are mirrored to form
a RAID1-style write buffer that temporarily absorbs the
small write requests and acts as a surrogate set during
recovery when a disk fails. The write data is reclaimed
to the data disks during the lightly loaded or idle
periods of the system. Reliability analysis shows that
the reliability of HPDA, in terms of MTTDL (Mean Time
To Data Loss), is better than that of either pure
HDD-based or SSD-based disk array. Our prototype
implementation of HPDA and the performance evaluations
show that HPDA significantly outperforms either
HDD-based or SSD-based disk array.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Klonatos:2012:TOS,
author = "Yannis Klonatos and Thanos Makatos and Manolis
Marazakis and Michail D. Flouris and Angelos Bilas",
title = "Transparent Online Storage Compression at the
Block-Level",
journal = j-TOS,
volume = "8",
number = "2",
pages = "5:1--5:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180905.2180906",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 6 18:17:34 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In this work, we examine how transparent block-level
compression in the I/O path can improve both the space
efficiency and performance of online storage. We
present ZBD, a block-layer driver that transparently
compresses and decompresses data as they flow between
the file-system and storage devices. Our system
provides support for variable-size blocks, metadata
caching, and persistence, as well as block allocation
and cleanup. ZBD targets maintaining high performance,
by mitigating compression and decompression overheads
that can have a significant impact on performance by
leveraging modern multicore CPUs through explicit work
scheduling. We present two case-studies for
compression. First, we examine how our approach can be
used to increase the capacity of SSD-based caches, thus
increasing their cost-effectiveness. Then, we examine
how ZBD can improve the efficiency of online disk-based
storage systems. We evaluate our approach in the Linux
kernel on a commodity server with multicore CPUs, using
PostMark, SPECsfs2008, TPC-C, and TPC-H. Preliminary
results show that transparent online block-level
compression is a viable option for improving effective
storage capacity, it can improve I/O performance up to
80\% by reducing I/O traffic and seek distance, and has
a negative impact on performance, up to 34\%, only when
single-thread I/O latency is critical. In particular,
for SSD-based caching, our results indicate that, in
line with current technology trends, compressed caching
trades off CPU utilization for performance and enhances
SSD efficiency as a storage cache up to 99\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Adams:2012:AWB,
author = "Ian F. Adams and Mark W. Storer and Ethan L. Miller",
title = "Analysis of Workload Behavior in Scientific and
Historical Long-Term Data Repositories",
journal = j-TOS,
volume = "8",
number = "2",
pages = "6:1--6:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180905.2180907",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 6 18:17:34 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The scope of archival systems is expanding beyond
cheap tertiary storage: scientific and medical data is
increasingly digital, and the public has a growing
desire to digitally record their personal histories.
Driven by the increase in cost efficiency of hard
drives, and the rise of the Internet, content archives
have become a means of providing the public with fast,
cheap access to long-term data. Unfortunately,
designers of purpose-built archival systems are either
forced to rely on workload behavior obtained from a
narrow, anachronistic view of archives as simply cheap
tertiary storage, or extrapolate from marginally
related enterprise workload data and traditional
library access patterns. To close this knowledge gap
and provide relevant input for the design of effective
long-term data storage systems, we studied the workload
behavior of several systems within this expanded
archival storage space. Our study examined several
scientific and historical archives, covering a mixture
of purposes, media types, and access models---that is,
public versus private. Our findings show that, for more
traditional private scientific archival storage, files
have become larger, but update rates have remained
largely unchanged. However, in the public content
archives we observed, we saw behavior that diverges
from the traditional ``write-once, read-maybe''
behavior of tertiary storage. Our study shows that the
majority of such data is modified---sometimes
unnecessarily---relatively frequently, and that
indexing services such as Google and internal data
management processes may routinely access large
portions of an archive, accounting for most of the
accesses. Based on these observations, we identify
areas for improving the efficiency and performance of
archival storage systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hsieh:2012:MDI,
author = "Jen-Wei Hsieh and Chung-Hsien Wu and Ge-Ming Chiu",
title = "{MFTL}: a Design and Implementation for {MLC} Flash
Memory Storage Systems",
journal = j-TOS,
volume = "8",
number = "2",
pages = "7:1--7:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2180905.2180908",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 6 18:17:34 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "NAND flash memory has gained its popularity in a
variety of applications as a storage medium due to its
low power consumption, nonvolatility, high performance,
physical stability, and portability. In particular,
Multi-Level Cell (MLC) flash memory, which provides a
lower cost and higher density solution, has occupied
the largest part of NAND flash-memory market share.
However, MLC flash memory also introduces new
challenges: (1) Pages in a block must be written
sequentially. (2) Information to indicate a page being
obsoleted cannot be recorded in its spare area due to
the limitation on the number of partial programming.
Since most of applications access NAND flash memory
under FAT file system, this article designs an MLC
Flash Translation Layer (MFTL) for flash-memory storage
systems which takes constraints of MLC flash memory and
access behaviors of FAT file system into consideration.
A series of trace-driven simulations was conducted to
evaluate the performance of the proposed scheme.
Although MFTL is designed for MLC flash memory and FAT
file system, it is applicable to SLC flash memory and
other file systems as well. Our experiment results show
that the proposed MFTL could achieve a good performance
for various access patterns even on SLC flash memory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Tran:2012:ECB,
author = "Nguyen Tran and Frank Chiang and Jinyang Li",
title = "Efficient cooperative backup with decentralized trust
management",
journal = j-TOS,
volume = "8",
number = "3",
pages = "8:1--8:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2339118.2339119",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 6 18:17:35 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Existing backup systems are unsatisfactory: commercial
backup services are reliable but expensive while
peer-to-peer systems are cheap but offer limited
assurance of data reliability. This article introduces
Friendstore, a system that provides inexpensive and
reliable backup by giving users the choice to store
backup data only on nodes they trust (typically those
owned by friends and colleagues). Because it is built
on trusted nodes, Friendstore is not burdened by the
complexity required to cope with potentially malicious
participants. Friendstore only needs to detect and
repair accidental data loss and to ensure balanced
storage exchange. The disadvantage of using only
trusted nodes is that Friendstore cannot achieve
perfect storage utilization. Friendstore is designed
for a heterogeneous environment where nodes have very
different access link speeds and available disk spaces.
To ensure long-term data reliability, a node with
limited upload bandwidth refrains from storing more
data than its calculated maintainable capacity. A high
bandwidth node might be limited by its available disk
space. We introduce a simple coding scheme, called
XOR(1,2), which doubles a node's ability to store
backup information in the same amount of disk space at
the cost of doubling the amount of data transferred
during restore. Analysis and simulations using
long-term node activity traces show that a node can
reliably back up tens of gigabytes of data even with
low upload bandwidth.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Abd-El-Malek:2012:FSV,
author = "Michael Abd-El-Malek and Matthew Wachs and James Cipar
and Karan Sanghi and Gregory R. Ganger and Garth A.
Gibson and Michael K. Reiter",
title = "File system virtual appliances: {Portable} file system
implementations",
journal = j-TOS,
volume = "8",
number = "3",
pages = "9:1--9:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2339118.2339120",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 6 18:17:35 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "File system virtual appliances (FSVAs) address the
portability headaches that plague file system (FS)
developers. By packaging their FS implementation in a
virtual machine (VM), separate from the VM that runs
user applications, they can avoid the need to port the
file system to each operating system (OS) and OS
version. A small FS-agnostic proxy, maintained by the
core OS developers, connects the FSVA to whatever OS
the user chooses. This article describes an FSVA design
that maintains FS semantics for unmodified FS
implementations and provides desired OS and
virtualization features, such as a unified buffer cache
and VM migration. Evaluation of prototype FSVA
implementations in Linux and NetBSD, using Xen as the
virtual machine manager (VMM), demonstrates that the
FSVA architecture is efficient, FS-agnostic, and able
to insulate file system implementations from OS
differences that would otherwise require explicit
porting.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Luo:2012:GXC,
author = "Xianghong Luo and Jiwu Shu",
title = "Generalized {X-code}: an efficient {RAID-6} code for
arbitrary size of disk array",
journal = j-TOS,
volume = "8",
number = "3",
pages = "10:1--10:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2339118.2339121",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Nov 6 18:17:35 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Many RAID-6 codes have been proposed in the
literature, but each has its limitations. Horizontal
code has the ability to adapt to the arbitrary size of
a disk array but its high computational complexity is a
major shortcoming. In contrast, the computational
complexity of vertical code (e.g. X-code) often
achieves the theoretical optimality, but vertical code
is limited to using a prime number as the size of the
disk array In this article, we propose a novel
efficient RAID-6 code for arbitrary size of disk array:
generalized X-code. We move the redundant elements
along their calculation diagonals in X-code onto two
specific disks and change two data elements into
redundant elements in order to realize our new code.
The generalized X-code achieves optimal encoding and
updating complexity and low decoding complexity; in
addition, it has the ability to adapt to arbitrary size
of disk array. Furthermore, we also provide a method
for generalizing horizontal code to achieve optimal
encoding and updating complexity while keeping the
code's original ability to adapt to arbitrary size of
disk array.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Long:2012:EN,
author = "Darrell Long",
title = "Editorial note",
journal = j-TOS,
volume = "8",
number = "4",
pages = "11:1--11:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2385603.2385604",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Bolosky:2012:ISI,
author = "Bill Bolosky and Jason Flinn",
title = "Introduction to the special issue {USENIX FAST 2012}",
journal = j-TOS,
volume = "8",
number = "4",
pages = "12:1--12:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2385603.2385605",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Shilane:2012:WOR,
author = "Philip Shilane and Mark Huang and Grant Wallace and
Windsor Hsu",
title = "{WAN}-optimized replication of backup datasets using
stream-informed delta compression",
journal = j-TOS,
volume = "8",
number = "4",
pages = "13:1--13:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2385603.2385606",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Replicating data off site is critical for disaster
recovery reasons, but the current approach of
transferring tapes is cumbersome and error prone.
Replicating across a wide area network (WAN) is a
promising alternative, but fast network connections are
expensive or impractical in many remote locations, so
improved compression is needed to make WAN replication
truly practical. We present a new technique for
replicating backup datasets across a WAN that not only
eliminates duplicate regions of files (deduplication)
but also compresses similar regions of files with delta
compression, which is available as a feature of EMC
Data Domain systems. Our main contribution is an
architecture that adds stream-informed delta
compression to already existing deduplication systems
and eliminates the need for new, persistent indexes.
Unlike techniques based on knowing a file's version or
that use a memory cache, our approach achieves delta
compression across all data replicated to a server at
any time in the past. From a detailed analysis of
datasets and statistics from hundreds of customers
using our product, we achieve an additional 2X
compression from delta compression beyond deduplication
and local compression, which enables customers to
replicate data that would otherwise fail to complete
within their backup window.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kim:2012:RSS,
author = "Hyojun Kim and Nitin Agrawal and Cristian Ungureanu",
title = "Revisiting storage for smartphones",
journal = j-TOS,
volume = "8",
number = "4",
pages = "14:1--14:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2385603.2385607",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Conventional wisdom holds that storage is not a big
contributor to application performance on mobile
devices. Flash storage (the type most commonly used
today) draws little power, and its performance is
thought to exceed that of the network subsystem. In
this article, we present evidence that storage
performance does indeed affect the performance of
several common applications such as Web browsing, maps,
application install, email, and Facebook. For several
Android smartphones, we find that just by varying the
underlying flash storage, performance over WiFi can
typically vary between 100\% and 300\% across
applications; in one extreme scenario, the variation
jumped to over 2000\%. With a faster network (set up
over USB), the performance variation rose even further.
We identify the reasons for the strong correlation
between storage and application performance to be a
combination of poor flash device performance, random
I/O from application databases, and heavy-handed use of
synchronous writes. Based on our findings, we implement
and evaluate a set of pilot solutions to address the
storage performance deficiencies in smartphones.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Fryer:2012:RVF,
author = "Daniel Fryer and Kuei Sun and Rahat Mahmood and
Tinghao Cheng and Shaun Benjamin and Ashvin Goel and
Angela Demke Brown",
title = "{Recon}: Verifying file system consistency at
runtime",
journal = j-TOS,
volume = "8",
number = "4",
pages = "15:1--15:??",
month = nov,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2385603.2385608",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "File system bugs that corrupt metadata on disk are
insidious. Existing reliability methods, such as
checksums, redundancy, or transactional updates, merely
ensure that the corruption is reliably preserved.
Typical workarounds, based on using backups or
repairing the file system, are painfully slow. Worse,
the recovery may result in further corruption. We
present Recon, a system that protects file system
metadata from buggy file system operations. Our
approach leverages file systems that provide crash
consistency using transactional updates. We define
declarative statements called consistency invariants
for a file system. These invariants must be satisfied
by each transaction being committed to disk to preserve
file system integrity. Recon checks these invariants at
commit, thereby minimizing the damage caused by buggy
file systems. The major challenges to this approach are
specifying invariants and interpreting file system
behavior correctly without relying on the file system
code. Recon provides a framework for file-system
specific metadata interpretation and invariant
checking. We show the feasibility of interpreting
metadata and writing consistency invariants for the
Linux ext3 file system using this framework. Recon can
detect random as well as targeted file-system
corruption at runtime as effectively as the offline
e2fsck file-system checker, with low overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{You:2013:USL,
author = "Gae-Won You and Seung-Won Hwang and Navendu Jain",
title = "{Ursa}: Scalable Load and Power Management in Cloud
Storage Systems",
journal = j-TOS,
volume = "9",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435204.2435205",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Enterprise and cloud data centers are comprised of
tens of thousands of servers providing petabytes of
storage to a large number of users and applications. At
such a scale, these storage systems face two key
challenges: (1) hot-spots due to the dynamic popularity
of stored objects; and (2) high operational costs due
to power and cooling. Existing storage solutions,
however, are unsuitable to address these challenges
because of the large number of servers and data
objects. This article describes the design,
implementation, and evaluation of Ursa, a system that
scales to a large number of storage nodes and objects,
and aims to minimize latency and bandwidth costs during
system reconfiguration. Toward this goal, Ursa
formulates an optimization problem that selects a
subset of objects from hot-spot servers and performs
topology-aware migration to minimize reconfiguration
costs. As exact optimization is computationally
expensive, we devise scalable approximation techniques
for node selection and efficient divide-and-conquer
computation. We also show that the same dynamic
reconfiguration techniques can be leveraged to reduce
power costs by dynamically migrating data off
under-utilized nodes, and powering up servers
neighboring existing hot-spots to reduce
reconfiguration costs. Our evaluation shows that Ursa
achieves cost-effective load management, is
time-responsive in computing placement decisions (e.g.,
about two minutes for 10K nodes and 10M objects), and
provides power savings of 15\%--37\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hatzieleftheriou:2013:IBE,
author = "Andromachi Hatzieleftheriou and Stergios V.
Anastasiadis",
title = "Improving Bandwidth Efficiency for Consistent
Multistream Storage",
journal = j-TOS,
volume = "9",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435204.2435206",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Synchronous small writes play a critical role in
system availability because they safely log recent
state modifications for fast recovery from crashes.
Demanding systems typically dedicate separate devices
to logging for adequate performance during normal
operation and redundancy during state reconstruction.
However, storage stacks enforce page-sized granularity
in data transfers from memory to disk. Thus, they
consume excessive storage bandwidth to handle small
writes, which hurts performance. The problem becomes
worse, as filesystems often handle multiple concurrent
streams, which effectively generate random I/O traffic.
In a journaled filesystem, we introduce wasteless
journaling as a mount mode that coalesces synchronous
concurrent small writes of data into full page-sized
journal blocks. Additionally, we propose selective
journaling to automatically activate wasteless
journaling on data writes with size below a fixed
threshold. We implemented a functional prototype of our
design over a widely-used filesystem. Our modes are
compared against existing methods using microbenchmarks
and application-level workloads on stand-alone servers
and a multitier networked system. We examine
synchronous and asynchronous writes. Coalescing small
data updates to the journal sequentially preserves
filesystem consistency while it reduces consumed
bandwidth up to several factors, decreases recovery
time up to 22\%, and lowers write latency up to orders
of magnitude.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Huang:2013:PCF,
author = "Cheng Huang and Minghua Chen and Jin Li",
title = "{Pyramid Codes}: Flexible Schemes to Trade Space for
Access Efficiency in Reliable Data Storage Systems",
journal = j-TOS,
volume = "9",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435204.2435207",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun May 5 09:02:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We design flexible schemes to explore the tradeoffs
between storage space and access efficiency in reliable
data storage systems. Aiming at this goal, two new
classes of erasure-resilient codes are introduced ---
Basic Pyramid Codes (BPC) and Generalized Pyramid Codes
(GPC). Both schemes require slightly more storage space
than conventional schemes, but significantly improve
the critical performance of read during failures and
unavailability. As a by-product, we establish a
necessary matching condition to characterize the limit
of failure recovery, that is, unless the matching
condition is satisfied, a failure case is impossible to
recover. In addition, we define a maximally recoverable
(MR) property. For all ERC schemes holding the MR
property, the matching condition becomes sufficient,
that is, all failure cases satisfying the matching
condition are indeed recoverable. We show that GPC is
the first class of non-MDS schemes holding the MR
property.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Huang:2013:ERD,
author = "Jianzhong Huang and Fenghao Zhang and Xiao Qin and
Changsheng Xie",
title = "Exploiting Redundancies and Deferred Writes to
Conserve Energy in Erasure-Coded Storage Clusters",
journal = j-TOS,
volume = "9",
number = "2",
pages = "4:1--4:??",
month = jul,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2491472.2491473",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:43 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We present a power-efficient scheme for erasure-coded
storage clusters --- ECS$^2$ --- which aims to offer
high energy efficiency with marginal reliability
degradation. ECS$^2$ utilizes data redundancies and
deferred writes to conserve energy. In ECS$^2$ parity
blocks are buffered exclusively in active data nodes
whereas parity nodes are placed into low-power mode. $
(k + r, k) $ RS-coded ECS$^2$ can achieve $ \lceil (r +
1) / 2 \rceil $-fault tolerance for $k$ active data
nodes and $r$-fault tolerance for all $ k + r $ nodes.
ECS$^2$ employs the following three optimizing
approaches to improve the energy efficiency of storage
clusters. (1) An adaptive threshold policy takes system
configurations and I/O workloads into account to
maximize standby time periods; (2) a selective
activation policy minimizes the number of
power-transitions in storage nodes; and (3) a
region-based buffer policy speeds up the
synchronization process by migrating parity blocks in a
batch method. After implementing an ECS$^2$ -based
prototype in a Linux cluster, we evaluated its energy
efficiency and performance using four different types
of I/O workloads. The experimental results indicate
that compared to energy-oblivious erasure-coded
storage, ECS$^2$ can save the energy used by storage
clusters up to 29.8\% and 28.0\% in read-intensive and
write-dominated workloads when $ k = 6 $ and $ r = 3 $,
respectively. The results also show that ECS$^2$
accomplishes high power efficiency in both normal and
failed cases without noticeably affecting the I/O
performance of storage clusters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Altiparmak:2013:GOR,
author = "Nihat Altiparmak and Ali Saman Tosun",
title = "Generalized Optimal Response Time Retrieval of
Replicated Data from Storage Arrays",
journal = j-TOS,
volume = "9",
number = "2",
pages = "5:1--5:??",
month = jul,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2491472.2491474",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:43 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Declustering techniques reduce query response times
through parallel I/O by distributing data among
parallel disks. Recently, replication-based approaches
were proposed to further reduce the response time.
Efficient retrieval of replicated data from multiple
disks is a challenging problem. Existing retrieval
techniques are designed for storage arrays with
identical disks, having no initial load or network
delay. In this article, we consider the generalized
retrieval problem of replicated data where the disks in
the system might be heterogeneous, the disks may have
initial load, and the storage arrays might be located
on different sites. We first formulate the generalized
retrieval problem using a Linear Programming (LP) model
and solve it with mixed integer programming techniques.
Next, the generalized retrieval problem is formulated
as a more efficient maximum flow problem. We prove that
the retrieval schedule returned by the maximum flow
technique yields the optimal response time and this
result matches the LP solution. We also propose a
low-complexity online algorithm for the generalized
retrieval problem by not guaranteeing the optimality of
the result. Performance of proposed and state of the
art retrieval strategies are investigated using various
replication schemes, query types, query loads, disk
specifications, network delays, and initial loads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sankar:2013:DSE,
author = "Sriram Sankar and Mark Shaw and Kushagra Vaid and
Sudhanva Gurumurthi",
title = "Datacenter Scale Evaluation of the Impact of
Temperature on Hard Disk Drive Failures",
journal = j-TOS,
volume = "9",
number = "2",
pages = "6:1--6:24",
month = jul,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2491472.2491475",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:43 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "With the advent of cloud computing and online
services, large enterprises rely heavily on their
datacenters to serve end users. A large datacenter
facility incurs increased maintenance costs in addition
to service unavailability when there are increased
failures. Among different server components, hard disk
drives are known to contribute significantly to server
failures; however, there is very little understanding
of the major determinants of disk failures in
datacenters. In this work, we focus on the
interrelationship between temperature, workload, and
hard disk drive failures in a large scale datacenter.
We present a dense storage case study from a population
housing thousands of servers and tens of thousands of
disk drives, hosting a large-scale online service at
Microsoft. We specifically establish correlation
between temperatures and failures observed at different
location granularities: (a) inside drive locations in a
server chassis, (b) across server locations in a rack,
and (c) across multiple racks in a datacenter. We show
that temperature exhibits a stronger correlation to
failures than the correlation of disk utilization with
drive failures. We establish that variations in
temperature are not significant in datacenters and have
little impact on failures. We also explore workload
impacts on temperature and disk failures and show that
the impact of workload is not significant. We then
experimentally evaluate knobs that control disk drive
temperature, including workload and chassis design
knobs. We corroborate our findings from the real data
study and show that workload knobs show minimal impact
on temperature. Chassis knobs like disk placement and
fan speeds have a larger impact on temperature.
Finally, we also show the proposed cost benefit of
temperature optimizations that increase hard disk drive
reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wu:2013:SFS,
author = "Xiaojian Wu and Sheng Qiu and A. L. Narasimha Reddy",
title = "{SCMFS}: a File System for Storage Class Memory and
its Extensions",
journal = j-TOS,
volume = "9",
number = "3",
pages = "7:1--7:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501620.2501621",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:47 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "Modern computer systems have been built around the
assumption that persistent storage is accessed via a
slow, block-based interface. However, emerging
nonvolatile memory technologies (sometimes referred to
as storage class memory (SCM)), are poised to
revolutionize storage systems. The SCM devices can be
attached directly to the memory bus and offer fast,
fine-grained access to persistent storage. In this
article, we propose a new file system --- SCMFS, which
is specially designed for Storage Class Memory. SCMFS
is implemented on the virtual address space and
utilizes the existing memory management module of the
operating system to help mange the file system space.
As a result, we largely simplified the file system
operations of SCMFS, which allowed us a better
exploration of performance gain from SCM. We have
implemented a prototype in Linux and evaluated its
performance through multiple benchmarks. The
experimental results show that SCMFS outperforms other
memory resident file systems, tmpfs, ramfs and ext2 on
ramdisk, and achieves about 70\% of memory bandwidth
for file read/write operations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Natanzon:2013:DSA,
author = "Assaf Natanzon and Eitan Bachmat",
title = "Dynamic Synchronous\slash Asynchronous Replication",
journal = j-TOS,
volume = "9",
number = "3",
pages = "8:1--8:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2508011",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:47 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Online, remote, data replication is critical for
today's enterprise IT organization. Availability of
data is key to the success of the organization. A few
hours of downtime can cost from thousands to millions
of dollars With increasing frequency, companies are
instituting disaster recovery plans to ensure
appropriate data availability in the event of a
catastrophic failure or disaster that destroys a site
(e.g. flood, fire, or earthquake). Synchronous and
asynchronous replication technologies have been
available for a long period of time. Synchronous
replication has the advantage of no data loss, but due
to latency, synchronous replication is limited by
distance and bandwidth. Asynchronous replication on the
other hand has no distance limitation, but leads to
some data loss which is proportional to the data lag.
We present a novel method, implemented within EMC
Recover-Point, which allows the system to dynamically
move between these replication options without any
disruption to the I/O path. As latency grows, the
system will move from synchronous replication to
semi-synchronous replication and then to snapshot
shipping. It returns to synchronous replication as more
bandwidth is available and latency allows.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Rodeh:2013:BLB,
author = "Ohad Rodeh and Josef Bacik and Chris Mason",
title = "{BTRFS}: The {Linux} {B}-Tree Filesystem",
journal = j-TOS,
volume = "9",
number = "3",
pages = "9:1--9:32",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501620.2501623",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:47 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "BTRFS is a Linux filesystem that has been adopted as
the default filesystem in some popular versions of
Linux. It is based on copy-on-write, allowing for
efficient snapshots and clones. It uses B-trees as its
main on-disk data structure. The design goal is to work
well for many use cases and workloads. To this end,
much effort has been directed to maintaining even
performance as the filesystem ages, rather than trying
to support a particular narrow benchmark use-case.
Linux filesystems are installed on smartphones as well
as enterprise servers. This entails challenges on many
different fronts.\par
--- Scalability. The filesystem must scale in many
dimensions: disk space, memory, and CPUs.\par
--- Data integrity. Losing data is not an option, and
much effort is expended to safeguard the content. This
includes checksums, metadata duplication, and RAID
support built into the filesystem.\par
--- Disk diversity. The system should work well with
SSDs and hard disks. It is also expected to be able to
use an array of different sized disks, which poses
challenges to the RAID and striping
mechanisms.\par
This article describes the core ideas, data structures,
and algorithms of this filesystem. It sheds light on
the challenges posed by defragmentation in the presence
of snapshots, and the tradeoffs required to maintain
even performance in the face of a wide spectrum of
workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Jiang:2013:PSE,
author = "Song Jiang and Xiaoning Ding and Yuehai Xu and Kei
Davis",
title = "A Prefetching Scheme Exploiting both Data Layout and
Access History on Disk",
journal = j-TOS,
volume = "9",
number = "3",
pages = "10:1--10:??",
month = aug,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2508010",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:47 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Prefetching is an important technique for improving
effective hard disk performance. A prefetcher seeks to
accurately predict which data will be requested and
load it ahead of the arrival of the corresponding
requests. Current disk prefetch policies in major
operating systems track access patterns at the level of
file abstraction. While this is useful for exploiting
application-level access patterns, for two reasons
file-level prefetching cannot realize the full
performance improvements achievable by prefetching.
First, certain prefetch opportunities can only be
detected by knowing the data layout on disk, such as
the contiguous layout of file metadata or data from
multiple files. Second, nonsequential access of disk
data (requiring disk head movement) is much slower than
sequential access, and the performance penalty for
mis-prefetching a randomly located block, relative to
that of a sequential block, is correspondingly greater.
To overcome the inherent limitations of prefetching at
logical file level, we propose to perform prefetching
directly at the level of disk layout, and in a portable
way. Our technique, called DiskSeen, is intended to be
supplementary to, and to work synergistically with, any
present file-level prefetch policies. DiskSeen tracks
the locations and access times of disk blocks and,
based on analysis of their temporal and spatial
relationships, seeks to improve the sequentiality of
disk accesses and overall prefetching performance. It
also implements a mechanism to minimize
mis-prefetching, on a per-application basis, to
mitigate the corresponding performance penalty. Our
implementation of the DiskSeen scheme in the Linux 2.6
kernel shows that it can significantly improve the
effectiveness of prefetching, reducing execution times
by 20\%--60\% for microbenchmarks and real applications
such as grep, CVS, and TPC-H. Even for workloads
specifically designed to expose its weaknesses,
DiskSeen incurs only minor performance loss.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2013:DEN,
author = "Guangyan Zhang and Weimin Zheng and Keqin Li",
title = "Design and Evaluation of a New Approach to {RAID-0}
Scaling",
journal = j-TOS,
volume = "9",
number = "4",
pages = "11:1--11:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2491054",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:51 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Scaling up a RAID-0 volume with added disks can
increase its storage capacity and I/O bandwidth
simultaneously. For preserving a round-robin data
distribution, existing scaling approaches require all
the data to be migrated. Such large data migration
results in a long redistribution time as well as a
negative impact on application performance. In this
article, we present a new approach to RAID-0 scaling
called FastScale. First, FastScale minimizes data
migration, while maintaining a uniform data
distribution. It moves only enough data blocks from old
disks to fill an appropriate fraction of new disks.
Second, FastScale optimizes data migration with access
aggregation and lazy checkpoint. Access aggregation
enables data migration to have a larger throughput due
to a decrement of disk seeks. Lazy checkpoint minimizes
the number of metadata writes without compromising data
consistency. Using several real system disk traces, we
evaluate the performance of FastScale through
comparison with SLAS, one of the most efficient
existing scaling approaches. The experiments show that
FastScale can reduce redistribution time by up to
86.06\% with smaller application I/O latencies. The
experiments also illustrate that the performance of
RAID-0 scaled using FastScale is almost identical to,
or even better than, that of the round-robin RAID-0.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Bessani:2013:DDS,
author = "Alysson Bessani and Miguel Correia and Bruno Quaresma
and Fernando Andr{\'e} and Paulo Sousa",
title = "{DepSky}: Dependable and Secure Storage in a
Cloud-of-Clouds",
journal = j-TOS,
volume = "9",
number = "4",
pages = "12:1--12:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2535929",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:51 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The increasing popularity of cloud storage services
has lead companies that handle critical data to think
about using these services for their storage needs.
Medical record databases, large biomedical datasets,
historical information about power systems and
financial data are some examples of critical data that
could be moved to the cloud. However, the reliability
and security of data stored in the cloud still remain
major concerns. In this work we present DepSky, a
system that improves the availability, integrity, and
confidentiality of information stored in the cloud
through the encryption, encoding, and replication of
the data on diverse clouds that form a cloud-of-clouds.
We deployed our system using four commercial clouds and
used PlanetLab to run clients accessing the service
from different countries. We observed that our
protocols improved the perceived availability, and in
most cases, the access latency, when compared with
cloud providers individually. Moreover, the monetary
costs of using DepSky in this scenario is at most twice
the cost of using a single cloud, which is optimal and
seems to be a reasonable cost, given the benefits.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kwon:2013:HAF,
author = "Se Jin Kwon and Hyung-Ju Cho and Tae-Sun Chung",
title = "Hybrid Associative Flash Translation Layer for the
Performance Optimization of Chip-Level Parallel Flash
Memory",
journal = j-TOS,
volume = "9",
number = "4",
pages = "13:1--13:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2535931",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:51 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Flash memory is used widely in the data storage
market, particularly low-price MultiLevel Cell (MLC)
flash memory, which has been adopted by large-scale
storage systems despite its low performance. To
overcome the poor performance of MLC flash memory, a
system architecture has been designed to optimize
chip-level parallelism. This design increases the size
of the page unit and the block unit, thereby
simultaneously executing operations on multiple chips.
Unfortunately, its Flash Translation Layer (FTL)
generates many unused sectors in each page, which leads
to unnecessary write operations. Furthermore, it reuses
an earlier log block scheme, although it generates many
erase operations because of its low space utilization.
To solve these problems, we propose a hybrid
associative FTL (Hybrid-FTL) to enhance the performance
of the chip-level parallel flash memory system.
Hybrid-FTL reduces the number of write operations by
utilizing all of the unused sectors. Furthermore, it
reduces the overall number of erase operations by
classifying data as hot, cold, or fragment data.
Hybrid-FTL requires less mapping information in the
DRAM and in the flash memory compared with previous FTL
algorithms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Xie:2013:EHA,
author = "Yulai Xie and Kiran-Kumar Muniswamy-Reddy and Dan Feng
and Yan Li and Darrell D. E. Long",
title = "Evaluation of a Hybrid Approach for Efficient
Provenance Storage",
journal = j-TOS,
volume = "9",
number = "4",
pages = "14:1--14:??",
month = nov,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2501986",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Dec 12 18:12:51 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Provenance is the metadata that describes the history
of objects. Provenance provides new functionality in a
variety of areas, including experimental documentation,
debugging, search, and security. As a result, a number
of groups have built systems to capture provenance.
Most of these systems focus on provenance collection, a
few systems focus on building applications that use the
provenance, but all of these systems ignore an
important aspect: efficient long-term storage of
provenance. In this article, we first analyze the
provenance collected from multiple workloads and
characterize the properties of provenance with respect
to long-term storage. We then propose a hybrid scheme
that takes advantage of the graph structure of
provenance data and the inherent duplication in
provenance data. Our evaluation indicates that our
hybrid scheme, a combination of Web graph compression
(adapted for provenance) and dictionary encoding,
provides the best trade-off in terms of compression
ratio, compression time, and query performance when
compared to other compression schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lee:2014:UBC,
author = "Eunji Lee and Hyokyung Bahn and Sam H. Noh",
title = "A Unified Buffer Cache Architecture that Subsumes
Journaling Functionality via Nonvolatile Memory",
journal = j-TOS,
volume = "10",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560010",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Feb 5 16:53:47 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "Journaling techniques are widely used in modern file
systems as they provide high reliability and fast
recovery from system failures. However, it reduces the
performance benefit of buffer caching as journaling
accounts for a bulk of the storage writes in real
system environments. To relieve this problem, we
present a novel buffer cache architecture that subsumes
the functionality of caching and journaling by making
use of nonvolatile memory such as PCM or STT-MRAM.
Specifically, our buffer cache supports what we call
the in-place commit scheme. This scheme avoids logging,
but still provides the same journaling effect by simply
altering the state of the cached block to frozen. As a
frozen block still provides the functionality of a
cache block, we show that in-place commit does not
degrade cache performance. We implement our scheme on
Linux 2.6.38 and measure the throughput and execution
time of the scheme with various file I/O benchmarks.
The results show that our scheme improves the
throughput and execution time by 89\% and 34\% on
average, respectively, compared to the existing Linux
buffer cache with ext4 without any loss of
reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Ma:2014:FFF,
author = "Ao Ma and Chris Dragga and Andrea C. Arpaci-Dusseau
and Remzi H. Arpaci-Dusseau and Marshall Kirk
McKusick",
title = "{Ffsck}: The Fast File-System Checker",
journal = j-TOS,
volume = "10",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560011",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Feb 5 16:53:47 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "Failures, errors, and bugs can corrupt file systems
and cause data loss, despite the presence of journals
and similar preventive techniques. While consistency
checkers such as fsck can detect corruption and repair
a damaged image, they are generally created as an
afterthought, to be run only at rare intervals. Thus,
checkers operate slowly, causing significant downtime
for large scale storage systems. We address this
dilemma by treating the checker as a key component of
the overall file system, rather than a peripheral
add-on. To this end, we present a modified ext3 file
system, rext 3, to directly support the fast
file-system checker, ffsck. Rext3 colocates and
self-identifies its metadata blocks, removing the need
for costly seeks and tree traversals during checking.
These modifications allow ffsck to scan and repair the
file system at rates approaching the full sequential
bandwidth of the underlying device. In addition, we
demonstrate that rext3 generally performs competitively
with ext3 and exceeds it in handling random reads and
large writes. Finally, we apply our principles to
FreeBSD's FFS file system and its checker, doing so in
a lightweight fashion that preserves the file-system
layout while still providing some of the performance
gains from ffsck.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lu:2014:SLF,
author = "Lanyue Lu and Andrea C. Arpaci-Dusseau and Remzi H.
Arpaci-Dusseau and Shan Lu",
title = "A Study of {Linux} File System Evolution",
journal = j-TOS,
volume = "10",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560012",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Feb 5 16:53:47 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "We conduct a comprehensive study of file-system code
evolution. By analyzing eight years of Linux
file-system changes across 5079 patches, we derive
numerous new (and sometimes surprising) insights into
the file-system development process; our results should
be useful for both the development of file systems
themselves as well as the improvement of bug-finding
tools.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Plank:2014:SDS,
author = "James S. Plank and Mario Blaum",
title = "Sector-Disk {(SD)} Erasure Codes for Mixed Failure
Modes in {RAID} Systems",
journal = j-TOS,
volume = "10",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2560013",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Feb 5 16:53:47 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Traditionally, when storage systems employ erasure
codes, they are designed to tolerate the failures of
entire disks. However, the most common types of
failures are latent sector failures, which only affect
individual disk sectors, and block failures which arise
through wear on SSD's. This article introduces SD
codes, which are designed to tolerate combinations of
disk and sector failures. As such, they consume far
less storage resources than traditional erasure codes.
We specify the codes with enough detail for the storage
practitioner to employ them, discuss their practical
properties, and detail an open-source implementation.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Choi:2014:THP,
author = "Jae Woo Choi and Dong In Shin and Young Jin Yu and
Hyeonsang Eom and Heon Young Yeom",
title = "Towards High-Performance {SAN} with Fast Storage
Devices",
journal = j-TOS,
volume = "10",
number = "2",
pages = "5:1--5:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2577385",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Apr 1 05:59:01 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Storage area network (SAN) is one of the most popular
solutions for constructing server environments these
days. In these kinds of server environments, HDD-based
storage usually becomes the bottleneck of the overall
system, but it is not enough to merely replace the
devices with faster ones in order to exploit their high
performance. In other words, proper optimizations are
needed to fully utilize their performance gains. In
this work, we first adopted a DRAM-based SSD as a fast
backend-storage in the existing SAN environment, and
found significant performance degradation compared to
its own capabilities, especially in the case of
small-sized random I/O pattern, even though a
high-speed network was used. We have proposed three
optimizations to solve this problem: (1) removing
software overhead in the SAN I/O path; (2) increasing
parallelism in the procedures for handling I/O
requests; and (3) adopting the temporal merge mechanism
to reduce network overheads. We have implemented them
as a prototype and found that our approaches make
substantial performance improvements by up to 39\% and
280\% in terms of both the latency and bandwidth,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Mao:2014:RPO,
author = "Bo Mao and Hong Jiang and Suzhen Wu and Yinjin Fu and
Lei Tian",
title = "Read-Performance Optimization for Deduplication-Based
Storage Systems in the Cloud",
journal = j-TOS,
volume = "10",
number = "2",
pages = "6:1--6:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512348",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Apr 1 05:59:01 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Data deduplication has been demonstrated to be an
effective technique in reducing the total data
transferred over the network and the storage space in
cloud backup, archiving, and primary storage systems,
such as VM (virtual machine) platforms. However, the
performance of restore operations from a deduplicated
backup can be significantly lower than that without
deduplication. The main reason lies in the fact that a
file or block is split into multiple small data chunks
that are often located in different disks after
deduplication, which can cause a subsequent read
operation to invoke many disk IOs involving multiple
disks and thus degrade the read performance
significantly. While this problem has been by and large
ignored in the literature thus far, we argue that the
time is ripe for us to pay significant attention to it
in light of the emerging cloud storage applications and
the increasing popularity of the VM platform in the
cloud. This is because, in a cloud storage or VM
environment, a simple read request on the client side
may translate into a restore operation if the data to
be read or a VM suspended by the user was previously
deduplicated when written to the cloud or the VM
storage server, a likely scenario considering the
network bandwidth and storage capacity concerns in such
an environment. To address this problem, in this
article, we propose SAR, an SSD (solid-state
drive)-Assisted Read scheme, that effectively exploits
the high random-read performance properties of SSDs and
the unique data-sharing characteristic of
deduplication-based storage systems by storing in SSDs
the unique data chunks with high reference count, small
size, and nonsequential characteristics. In this way,
many read requests to HDDs are replaced by read
requests to SSDs, thus significantly improving the read
performance of the deduplication-based storage systems
in the cloud. The extensive trace-driven and VM restore
evaluations on the prototype implementation of SAR show
that SAR outperforms the traditional
deduplication-based and flash-based cache schemes
significantly, in terms of the average response
times.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Elerath:2014:BMC,
author = "Jon G. Elerath and Jiri Schindler",
title = "Beyond {MTTDL}: a Closed-Form {RAID 6} Reliability
Equation",
journal = j-TOS,
volume = "10",
number = "2",
pages = "7:1--7:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2577386",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Apr 1 05:59:01 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
note = "See rebuttal \cite{Iliadis:2015:RBM}.",
abstract = "We introduce a new closed-form equation for estimating
the number of data-loss events for a redundant array of
inexpensive disks in a RAID-6 configuration. The
equation expresses operational failures, their
restorations, latent (sector) defects, and disk media
scrubbing by time-based distributions that can
represent non-homogeneous Poisson processes. It uses
two-parameter Weibull distributions that allows the
distributions to take on many different shapes,
modeling increasing, decreasing, or constant occurrence
rates. This article focuses on the statistical basis of
the equation. It also presents time-based distributions
of the four processes based on an extensive analysis of
field data collected over several years from 10,000s of
commercially available systems with 100,000s of disk
drives. Our results for RAID-6 groups of size 16
indicate that the closed-form expression yields much
more accurate results compared to the MTTDL reliability
equation and matching computationally-intensive Monte
Carlo simulations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Desnoyers:2014:AMS,
author = "Peter Desnoyers",
title = "Analytic Models of {SSD} Write Performance",
journal = j-TOS,
volume = "10",
number = "2",
pages = "8:1--8:??",
month = mar,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2577384",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Apr 1 05:59:01 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Solid-state drives (SSDs) update data by writing a new
copy, rather than overwriting old data, causing prior
copies of the same data to be invalidated. These writes
are performed in units of pages, while space is
reclaimed in units of multipage erase blocks,
necessitating copying of any remaining valid pages in
the block before reclamation. The efficiency of this
cleaning process greatly affects performance under
random workloads; in particular, in SSDs, the write
bottleneck is typically internal media throughput, and
write amplification due to additional internal copying
directly reduces application throughput. We present the
first nearly-exact closed-form solution for write
amplification under greedy cleaning for
uniformly-distributed random traffic, validate its
accuracy via simulation, and show that its inaccuracies
are negligible for reasonable block sizes and
overprovisioning ratios. In addition, we also present
the first models which predict performance degradation
for both LRW (least-recently-written) cleaning and
greedy cleaning under simple nonuniform traffic
conditions; simulation results show the first model to
be exact and the second to be accurate within 2\%. We
extend the LRW model to arbitrary combinations of
random traffic and demonstrate its use in predicting
cleaning performance for real-world workloads. Using
these analytic models, we examine the strategy of
separating ``hot'' and ``cold'' data, showing that for
our traffic model, such separation eliminates any loss
in performance due to nonuniform traffic. We then show
how a system which segregates hot and cold data into
different block pools may shift free space between
these pools in order to achieve improved performance,
and how numeric methods may be used with our model to
find the optimum operating point, which approaches a
write amplification of 1.0 for increasingly skewed
traffic. We examine online methods for achieving this
optimal operating point and show a control strategy
based on our model which achieves high performance for
a number of real-world block traces.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Miranda:2014:RSE,
author = "Alberto Miranda and Sascha Effert and Yangwook Kang
and Ethan L. Miller and Ivan Popov and Andre Brinkmann
and Tom Friedetzky and Toni Cortes",
title = "Random Slicing: Efficient and Scalable Data Placement
for Large-Scale Storage Systems",
journal = j-TOS,
volume = "10",
number = "3",
pages = "9:1--9:??",
month = jul,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2632230",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Aug 12 16:53:23 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The ever-growing amount of data requires highly
scalable storage solutions. The most flexible approach
is to use storage pools that can be expanded and scaled
down by adding or removing storage devices. To make
this approach usable, it is necessary to provide a
solution to locate data items in such a dynamic
environment. This article presents and evaluates the
Random Slicing strategy, which incorporates lessons
learned from table-based, rule-based, and
pseudo-randomized hashing strategies and is able to
provide a simple and efficient strategy that scales up
to handle exascale data. Random Slicing keeps a small
table with information about previous storage system
insert and remove operations, drastically reducing the
required amount of randomness while delivering a
perfect load distribution.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Saxena:2014:DPS,
author = "Mohit Saxena and Michael M. Swift",
title = "Design and Prototype of a Solid-State Cache",
journal = j-TOS,
volume = "10",
number = "3",
pages = "10:1--10:??",
month = jul,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629491",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Aug 12 16:53:23 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The availability of high-speed solid-state storage has
introduced a new tier into the storage hierarchy.
Low-latency and high-IOPS solid-state drives (SSDs)
cache data in front of high-capacity disks. However,
most existing SSDs are designed to be a drop-in disk
replacement, and hence are mismatched for use as a
cache. This article describes FlashTier, a system
architecture built upon a solid-state cache (SSC),
which is a flash device with an interface designed for
caching. Management software at the operating system
block layer directs caching. The FlashTier design
addresses three limitations of using traditional SSDs
for caching. First, FlashTier provides a unified
logical address space to reduce the cost of cache block
management within both the OS and the SSD. Second,
FlashTier provides a new SSC block interface to enable
a warm cache with consistent data after a crash.
Finally, FlashTier leverages cache behavior to silently
evict data blocks during garbage collection to improve
performance of the SSC. We first implement an SSC
simulator and a cache manager in Linux to perform an
in-depth evaluation and analysis of FlashTier's design
techniques. Next, we develop a prototype of SSC on the
OpenSSD Jasmine hardware platform to investigate the
benefits and practicality of FlashTier design. Our
prototyping experiences provide insights applicable to
managing modern flash hardware, implementing other SSD
prototypes and new OS storage stack interface
extensions. Overall, we find that FlashTier improves
cache performance by up to 168\% over consumer-grade
SSDs and up to 52\% over high-end SSDs. It also
improves flash lifetime for write-intensive workloads
by up to 60\% compared to SSD caches with a traditional
flash interface.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lee:2014:CSH,
author = "Eunji Lee and Hyokyung Bahn",
title = "Caching Strategies for High-Performance Storage
Media",
journal = j-TOS,
volume = "10",
number = "3",
pages = "11:1--11:??",
month = jul,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2633691",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Aug 12 16:53:23 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Due to the large access latency of hard disks during
data retrieval in computer systems, buffer caching
mechanisms have been studied extensively in database
and operating systems. By storing requested data into
the buffer cache, subsequent requests can be directly
serviced without accessing slow disk storage.
Meanwhile, high-speed storage media like PCM
(phase-change memory) have emerged recently, and one
may wonder if the traditional buffer cache will be
still effective for these high-speed storage media.
This article answers the question by showing that the
buffer cache is still effective in such environments
due to the software overhead and the bimodal data
access characteristics. Based on this observation, we
present a new buffer cache management scheme
appropriately designed for the system where the speed
gap between cache and storage is narrow. To this end,
we analyze the condition that caching will be effective
and find the characteristics of access patterns that
can be exploited in managing buffer cache for high
performance storage like PCM.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sun:2014:LDL,
author = "Zhiwei Sun and Anthony Skjellum and Lee Ward and
Matthew L. Curry",
title = "A Lightweight Data Location Service for
Nondeterministic Exascale Storage Systems",
journal = j-TOS,
volume = "10",
number = "3",
pages = "12:1--12:??",
month = jul,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629451",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Aug 12 16:53:23 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In this article, we present LWDLS, a lightweight data
location service designed for Exascale storage systems
(storage systems with order of 10$^{18}$ bytes) and
geo-distributed storage systems (large storage systems
with physically distributed locations). LWDLS provides
a search-based data location solution, and enables free
data placement, movement, and replication. In LWDLS,
probe and prune protocols are introduced that reduce
topology mismatch, and a heuristic flooding search
algorithm (HFS) is presented that achieves higher
search efficiency than pure flooding search while
having comparable search speed and coverage to the pure
flooding search. LWDLS is lightweight and scalable in
terms of incorporating low overhead, high search
efficiency, no global state, and avoiding periodic
messages. LWDLS is fully distributed and can be used in
nondeterministic storage systems and in deterministic
storage systems to deal with cases where search is
needed. Extensive simulations modeling large-scale High
Performance Computing (HPC) storage environments
provide representative performance outcomes.
Performance is evaluated by metrics including search
scope, search efficiency, and average neighbor
distance. Results show that LWDLS is able to locate
data efficiently with low cost of state maintenance in
arbitrary network environments. Through these
simulations, we demonstrate the effectiveness of
protocols and search algorithm of LWDLS.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Schroeder:2014:ISI,
author = "Bianca Schroeder and Eno Thereska",
title = "Introduction to the Special Issue on {USENIX FAST
2014}",
journal = j-TOS,
volume = "10",
number = "4",
pages = "13:1--13:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2670792",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Oct 31 16:06:21 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2014:SCG,
author = "Mingqiang Li and Patrick P. C. Lee",
title = "{STAIR} Codes: a General Family of Erasure Codes for
Tolerating Device and Sector Failures",
journal = j-TOS,
volume = "10",
number = "4",
pages = "14:1--14:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2658991",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Oct 31 16:06:21 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Practical storage systems often adopt erasure codes to
tolerate device failures and sector failures, both of
which are prevalent in the field. However, traditional
erasure codes employ device-level redundancy to protect
against sector failures, and hence incur significant
space overhead. Recent sector-disk (SD) codes are
available only for limited configurations. By making a
relaxed but practical assumption, we construct a
general family of erasure codes called STAIR codes,
which efficiently and provably tolerate both device and
sector failures without any restriction on the size of
a storage array and the numbers of tolerable device
failures and sector failures. We propose the upstairs
encoding and downstairs encoding methods, which provide
complementary performance advantages for different
configurations. We conduct extensive experiments on
STAIR codes in terms of space saving, encoding/decoding
speed, and update cost. We demonstrate that STAIR codes
not only improve space efficiency over traditional
erasure codes, but also provide better computational
efficiency than SD codes based on our special code
construction. Finally, we present analytical models
that characterize the reliability of STAIR codes, and
show that the support of a wider range of
configurations by STAIR codes is critical for
tolerating sector failure bursts discovered in the
field.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kim:2014:EPC,
author = "Hyojun Kim and Sangeetha Seshadri and Clement L.
Dickey and Lawrence Chiu",
title = "Evaluating Phase Change Memory for Enterprise Storage
Systems: a Study of Caching and Tiering Approaches",
journal = j-TOS,
volume = "10",
number = "4",
pages = "15:1--15:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2668128",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Oct 31 16:06:21 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Storage systems based on Phase Change Memory (PCM)
devices are beginning to generate considerable
attention in both industry and academic communities.
But whether the technology in its current state will be
a commercially and technically viable alternative to
entrenched technologies such as flash-based SSDs
remains undecided. To address this, it is important to
consider PCM SSD devices not just from a device
standpoint, but also from a holistic perspective. This
article presents the results of our performance study
of a recent all-PCM SSD prototype. The average latency
for a 4KiB random read is 6.7 $ \mu $ s, which is about
$ 16 \times $ faster than a comparable eMLC flash SSD.
The distribution of I/O response times is also much
narrower than flash SSD for both reads and writes.
Based on the performance measurements and real-world
workload traces, we explore two typical storage use
cases: tiering and caching. We report that the IOPS/\$
of a tiered storage system can be improved by 12--66\%
and the aggregate elapsed time of a server-side caching
solution can be improved by up to 35\% by adding PCM.
Our results show that (even at current price points)
PCM storage devices show promising performance as a new
component in enterprise storage systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Xu:2014:APE,
author = "Lianghong Xu and James Cipar and Elie Krevat and
Alexey Tumanov and Nitin Gupta and Michael A. Kozuch
and Gregory R. Ganger",
title = "Agility and Performance in Elastic Distributed
Storage",
journal = j-TOS,
volume = "10",
number = "4",
pages = "16:1--16:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2668129",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Oct 31 16:06:21 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Elastic storage systems can be expanded or contracted
to meet current demand, allowing servers to be turned
off or used for other tasks. However, the usefulness of
an elastic distributed storage system is limited by its
agility: how quickly it can increase or decrease its
number of servers. Due to the large amount of data they
must migrate during elastic resizing, state of the art
designs usually have to make painful trade-offs among
performance, elasticity, and agility. This article
describes the state of the art in elastic storage and a
new system, called SpringFS, that can quickly change
its number of active servers, while retaining
elasticity and performance goals. SpringFS uses a novel
technique, termed bounded write offloading, that
restricts the set of servers where writes to overloaded
servers are redirected. This technique, combined with
the read offloading and passive migration policies used
in SpringFS, minimizes the work needed before
deactivation or activation of servers. Analysis of
real-world traces from Hadoop deployments at Facebook
and various Cloudera customers and experiments with the
SpringFS prototype confirm SpringFS's agility, show
that it reduces the amount of data migrated for elastic
resizing by up to two orders of magnitude, and show
that it cuts the percentage of active servers required
by 67--82\%, outdoing state-of-the-art designs by
6--120\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Fryer:2014:CIT,
author = "Daniel Fryer and Mike Qin and Jack Sun and Kah Wai Lee
and Angela Demke Brown and Ashvin Goel",
title = "Checking the Integrity of Transactional Mechanisms",
journal = j-TOS,
volume = "10",
number = "4",
pages = "17:1--17:??",
month = oct,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2675113",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Oct 31 16:06:21 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Data corruption is the most common consequence of
file-system bugs. When such corruption occurs, offline
check and recovery tools must be used, but they are
error prone and cause significant downtime. Previously
we showed that a runtime checker for the Ext3 file
system can verify that metadata updates are consistent,
helping detect corruption in metadata blocks at
transaction commit time. However, corruption can still
occur when a bug in the file system's transactional
mechanism loses, misdirects, or corrupts writes. We
show that a runtime checker must enforce the atomicity
and durability properties of the file system on every
write, in addition to checking transactions at commit
time, to provide the strong guarantee that every block
write will maintain file system consistency. We
identify the invariants that need to be enforced on
journaling and shadow paging file systems to preserve
the integrity of committed transactions. We also
describe the key properties that make it feasible to
check these invariants for a file system. Based on this
characterization, we have implemented runtime checkers
for Ext3 and Btrfs. Our evaluation shows that both
checkers detect data corruption effectively, and they
can be used during normal operation with low
overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Trifonov:2015:LCI,
author = "P. Trifonov",
title = "Low-Complexity Implementation of {RAID} Based on
{Reed--Solomon} Codes",
journal = j-TOS,
volume = "11",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700308",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Feb 24 18:13:03 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Fast algorithms are proposed for encoding and
reconstructing data in RAID based on Reed--Solomon
codes. The proposed approach is based on the cyclotomic
fast Fourier transform algorithm and enables one to
significantly reduce the number of expensive Galois
field multiplications required. The complexity of the
obtained algorithms is much lower than those for
existing MDS array codes. Software implementation of
the proposed algorithms is discussed. The performance
results show that the new algorithms provide
substantially better performance compared with the
standard algorithm.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2015:EHI,
author = "Yan-Kit Li and Min Xu and Chun-Ho Ng and Patrick P. C.
Lee",
title = "Efficient Hybrid Inline and Out-of-Line Deduplication
for Backup Storage",
journal = j-TOS,
volume = "11",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2641572",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Feb 24 18:13:03 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Backup storage systems often remove redundancy across
backups via inline deduplication, which works by
referring duplicate chunks of the latest backup to
those of existing backups. However, inline
deduplication degrades restore performance of the
latest backup due to fragmentation, and complicates
deletion of expired backups due to the sharing of data
chunks. While out-of-line deduplication addresses the
problems by forward-pointing existing duplicate chunks
to those of the latest backup, it introduces additional
I/Os of writing and removing duplicate chunks. We
design and implement RevDedup, an efficient hybrid
inline and out-of-line deduplication system for backup
storage. It applies coarse-grained inline deduplication
to remove duplicates of the latest backup, and then
fine-grained out-of-line reverse deduplication to
remove duplicates from older backups. Our reverse
deduplication design limits the I/O overhead and
prepares for efficient deletion of expired backups.
Through extensive testbed experiments using synthetic
and real-world datasets, we show that RevDedup can
bring high performance to the backup, restore, and
deletion operations, while maintaining high storage
efficiency comparable to conventional inline
deduplication.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hwang:2015:HHB,
author = "Taeho Hwang and Jaemin Jung and Youjip Won",
title = "{HEAPO}: Heap-Based Persistent Object Store",
journal = j-TOS,
volume = "11",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629619",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Feb 24 18:13:03 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In this work, we developed a Heap-Based Persistent
Object Store (HEAPO) to manage persistent objects in
byte-addressable Nonvolatile RAM (NVRAM). HEAPO defines
its own persistent heap layout, the persistent object
format, name space organization, object sharing and
protection mechanism, and undo-only log-based crash
recovery, all of which are effectively tailored for
NVRAM. We put our effort into developing a lightweight
and flexible layer to exploit the DRAM-like access
latency of NVRAM. To address this objective, we
developed (i) a native management layer for NVRAM to
eliminate redundancy between in-core and on-disk copies
of the metadata, (ii) an expandable object format,
(iii) a burst trie-based global name space with local
name space caching, (iv) static address binding, and
(v) minimal logging for undo-only crash recovery. We
implemented HEAPO at commodity OS (Linux 2.6.32) and
measured the performance. By eliminating metadata
redundancy, HEAPO improved the speed of creating,
attaching, and expanding an object by $ 1.3 \times $, $
4.5 \times $, and $ 3.8 \times $, respectively,
compared to memory-mapped file-based persistent object
store. Burst trie-based name space organization of
HEAPO yielded $ 7.6 \times $ better lookup performance
compared to hashed B-tree-based name space of EXT4. We
modified memcachedb to use HEAPO in maintaining its
search structure. For hash table update, HEAPO-based
memcachedb yielded $ 3.4 \times $ performance
improvement against original memcachedb implementation
which uses mmap() over ramdisk approach to maintain the
key-value store in memory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wei:2015:ZMZ,
author = "Qingsong Wei and Cheng Chen and Mingdi Xue and Jun
Yang",
title = "{Z-MAP}: a Zone-Based Flash Translation Layer with
Workload Classification for Solid-State Drive",
journal = j-TOS,
volume = "11",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629663",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Feb 24 18:13:03 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Existing space management and address mapping schemes
for flash-based Solid-State-Drive (SSD) operate either
at page or block granularity, with inevitable
limitations in terms of memory requirement,
performance, garbage collection, and scalability. To
overcome these limitations, we proposed a novel space
management and address mapping scheme for flash
referred to as Z-MAP, which manages flash space at
granularity of Zone. Each Zone consists of multiple
numbers of flash blocks. Leveraging workload
classification, Z-MAP explores Page-mapping Zone (Page
Zone) to store random data and handle a large number of
partial updates, and Block-mapping Zone (Block Zone) to
store sequential data and lower the overall mapping
table. Zones are dynamically allocated and a mapping
scheme for a Zone is determined only when it is
allocated. Z-MAP uses a small part of Flash memory or
phase change memory as a streaming Buffer Zone to log
data sequentially and migrate data into Page Zone or
Block Zone based on workload classification. A
two-level address mapping is designed to reduce the
overall mapping table and address translation latency.
Z-MAP classifies data before it is permanently stored
into Flash memory so that different workloads can be
isolated and garbage collection overhead can be
minimized. Z-MAP has been extensively evaluated by
trace-driven simulation and a prototype implementation
on OpenSSD. Our benchmark results conclusively
demonstrate that Z-MAP can achieve up to 76\%
performance improvement, 81\% mapping table reduction,
and 88\% garbage collection overhead reduction compared
to existing Flash Translation Layer (FTL) schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Gim:2015:SSC,
author = "Jongmin Gim and Taeho Hwang and Youjip Won and Krishna
Kant",
title = "{SmartCon}: Smart Context Switching for Fast Storage
{IO} Devices",
journal = j-TOS,
volume = "11",
number = "2",
pages = "5:1--5:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2631922",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 24 17:41:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Handling of storage IO in modern operating systems
assumes that such devices are slow and CPU cycles are
valuable. Consequently, to effectively exploit the
underlying hardware resources, for example, CPU cycles,
storage bandwidth and the like, whenever an IO request
is issued to such device, the requesting thread is
switched out in favor of another thread that may be
ready to execute. Recent advances in nonvolatile
storage technologies and multicore CPUs make both of
these assumptions increasingly questionable, and an
unconditional context switch is no longer desirable. In
this article, we propose a novel mechanism called
SmartCon, which intelligently decides whether to
service a given IO request in interrupt-driven manner
or busy-wait--based manner based on not only the device
characteristics but also dynamic parameters such as IO
latency, CPU utilization, and IO size. We develop an
analytic performance model to project the performance
of SmartCon for forthcoming devices. We implement
SmartCon mechanism on Linux 2.6 and perform detailed
evaluation using three different IO devices: Ramdisk,
low-end SSD, and high-end SSD. We find that SmartCon
yields up to a 39\% performance gain over the
mainstream block device approach for Ramdisk, and up to
a 45\% gain for PCIe-based SSD and SATA-based SSDs. We
examine the detailed behavior of TLB, L1, L2 cache and
show that SmartCon achieves significant improvement in
all cache misbehaviors.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Rodeh:2015:VBI,
author = "Ohad Rodeh and Haim Helman and David Chambliss",
title = "Visualizing Block {IO} Workloads",
journal = j-TOS,
volume = "11",
number = "2",
pages = "6:1--6:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2651422",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 24 17:41:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Massive block IO systems are the workhorses powering
many of today's largest applications. Databases, health
care systems, and virtual machine images are examples
for block storage applications. The massive scale of
these workloads, and the complexity of the underlying
storage systems, makes it difficult to pinpoint
problems when they occur. This work attempts to shed
light on workload patterns through visualization,
aiding our intuition. We describe our experience in the
last 3 years of analyzing and visualizing customer
traces from XIV, an IBM enterprise block storage
system. We also present results from applying the same
visualization technology to Linux filesystems. We show
how visualization aids our understanding of workloads
and how it assists in resolving customer performance
problems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wu:2015:DSF,
author = "Chin-Hsien Wu and Kuo-Yi Huang",
title = "Data Sorting in Flash Memory",
journal = j-TOS,
volume = "11",
number = "2",
pages = "7:1--7:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2665067",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 24 17:41:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Because flash memory now provides an economical
solution for various portable devices and embedded
systems, an NAND flash-based storage system has
replaced the hard disk drive in many applications.
Recently, the implementation of database systems using
an NAND flash-based storage system has become an
important research topic. In particular, the external
sorting is an important operation in database systems.
With the very distinctive characteristics of flash
memory, the typical external sorting system that adopts
a clustered sorting process can result in performance
degradation and reduce the reliability of flash memory.
In this article, we will propose an unclustered sorting
method that considers the unique characteristics of
flash memory, and we then propose a decision rule to
exploit the advantages of both clustered and
unclustered sorting. The decision rule can separate
records according to their record length, sort them
appropriately by the clustered and unclustered sorting,
and merge the sorted results. The experimental results
show that the proposed method can improve performance
in an NAND flash-based storage system (i.e.,
solid-state drive).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Cho:2015:DTS,
author = "Seokhei Cho and Changhyun Park and Youjip Won and
Sooyong Kang and Jaehyuk Cha and Sungroh Yoon and
Jongmoo Choi",
title = "Design Tradeoffs of {SSDs}: From Energy Consumption's
Perspective",
journal = j-TOS,
volume = "11",
number = "2",
pages = "8:1--8:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2644818",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 24 17:41:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In this work, we studied the energy consumption
characteristics of various SSD design parameters. We
developed an accurate energy consumption model for SSDs
that computes aggregate, as well as component-specific,
energy consumption of SSDs in sub-msec time scale. In
our study, we used five different FTLs (page mapping,
DFTL, block mapping, and two different hybrid mappings)
and four different channel configurations (two, four,
eight, and 16 channels) under seven different workloads
(from large-scale enterprise systems to small-scale
desktop applications) in a combinatorial manner. For
each combination of the aforementioned parameters, we
examined the energy consumption for individual hardware
components of an SSD (microcontroller, DRAM, NAND
flash, and host interface). The following are some of
our findings. First, DFTL is the most energy-efficient
address-mapping scheme among the five FTLs we tested
due to its good write amplification and small DRAM
footprint. Second, a significant fraction of energy is
being consumed by idle flash chips waiting for the
completion of NAND operations in the other channels.
FTL should be designed to fully exploit the internal
parallelism so that energy consumption by idle chips is
minimized. Third, as a means to increase the internal
parallelism, increasing way parallelism (the number of
flash chips in a channel) is more effective than
increasing channel parallelism in terms of peak energy
consumption, performance, and hardware complexity.
Fourth, in designing high-performance and
energy-efficient SSDs, channel switching delay, way
switching delay, and page write latency need to be
incorporated in an integrated manner to determine the
optimal configuration of internal parallelism.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Iliadis:2015:RBM,
author = "Ilias Iliadis and Vinodh Venkatesan",
title = "Rebuttal to {``Beyond MTTDL: a Closed-Form RAID-6
Reliability Equation''}",
journal = j-TOS,
volume = "11",
number = "2",
pages = "9:1--9:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700311",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 24 17:41:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
note = "See \cite{Elerath:2014:BMC}.",
abstract = "A recent article on the reliability of RAID-6 storage
systems overlooks certain relevant prior work published
in the past 20 years and concludes that the widely used
mean time to data loss (MTTDL) metric does not provide
accurate results. In this note, we refute this position
by invoking uncited relevant prior work and
demonstrating that the MTTDL remains a useful metric.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chen:2015:EER,
author = "Tseng-Yi Chen and Hsin-Wen Wei and Tsung-Tai Yeh and
Tsan-Sheng Hsu and Wei-Kuan Shih",
title = "An Energy-Efficient and Reliable Storage Mechanism for
Data-Intensive Academic Archive Systems",
journal = j-TOS,
volume = "11",
number = "2",
pages = "10:1--10:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2720021",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Mar 24 17:41:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Previous studies proposed energy-efficient solutions,
such as multispeed disks and disk spin-down methods, to
conserve power in their respective storage systems.
However, in most cases, the authors did not analyze the
reliability of their solutions. According to research
conducted by Google and the IDEMA standard, frequently
setting the disk status to standby mode will increase
the disk's Annual Failure Rate and reduce its lifespan.
To resolve the issue, we propose an evaluation function
called E$^3$ SaRC (Economic Evaluation of Energy Saving
with Reliability Constraint), which considers the cost
of hardware failure when applying energy-saving
schemes. We also present an adaptive write cache
mechanism called CacheRAID. The mechanism tries to
mitigate the random access problems that implicitly
exist in RAID techniques and thereby reduce the energy
consumption of RAID disks. CacheRAID also addresses the
issue of system reliability by applying a control
mechanism to the spin-down algorithm. Our experimental
results show that the CacheRAID storage system can
reduce the power consumption of the conventional
software RAID 5 system by 65\% to 80\%. Moreover,
according to the E$^3$ SaRC measurement, the overall
saved cost of CacheRAID is the largest among the
systems that we compared.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2015:FFC,
author = "Ji Zhang and Xunfei Jiang and Xiao Qin and Wei-Shinn
Ku and Mohammed I. Alghamdi",
title = "{Frog}: a Framework for Context-Based File Systems",
journal = j-TOS,
volume = "11",
number = "3",
pages = "11:1--11:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2720022",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Aug 7 09:14:17 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "This article presents a framework, Frog, for
Context-Based File Systems (CBFSs) that aim at
simplifying the development of context-based file
systems and applications. Unlike existing
informed-based context-aware systems, Frog is a
unifying informed-based framework that abstracts
context-specific solutions as views, allowing
applications to make view selections according to
application behaviors. The framework can not only
eliminate overheads induced by traditional context
analysis, but also simplify the interactions between
the context-based file systems and applications. Rather
than propagating data through solution-specific
interfaces, views in Frog can be selected by inserting
their names in file path strings. With Frog in place,
programmers can migrate an application from one
solution to another by switching among views rather
than changing programming interfaces. Since the data
consistency issues are automatically enforced by the
framework, file-system developers can focus their
attention on context-specific solutions. We implement
two prototypes to demonstrate the strengths and
overheads of our design. Inspired by an observation
that there are more than 50\% of small files ({$<$4KB})
in a file system, we create a Bi-context Archiving
Virtual File System (BAVFS) that utilizes conservative
and aggressive prefetching for the contexts of random
and sequential reads. To improve the performance of
random read-and-write operations, the Bi-context Hybrid
Virtual File System (BHVFS) combines the
update-in-place and update-out-of-place solutions for
read-intensive and write-intensive contexts. Our
experimental results show that the benefits of
Frog-based CBFSs outweigh the overheads introduced by
integrating multiple context-specific solutions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wei:2015:AFS,
author = "Qingsong Wei and Jianxi Chen and Cheng Chen",
title = "Accelerating File System Metadata Access with
Byte-Addressable Nonvolatile Memory",
journal = j-TOS,
volume = "11",
number = "3",
pages = "12:1--12:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2766453",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Aug 7 09:14:17 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "File system performance is dominated by small and
frequent metadata access. Metadata is stored as blocks
on the hard disk drive. Partial metadata update results
in whole-block read or write, which significantly
amplifies disk I/O. Furthermore, a huge performance gap
between the CPU and disk aggravates this problem. In
this article, a file system metadata accelerator
(referred to as FSMAC) is proposed to optimize metadata
access by efficiently exploiting the persistency and
byte-addressability of Nonvolatile Memory (NVM). The
FSMAC decouples data and metadata access path, putting
data on disk and metadata in byte-addressable NVM at
runtime. Thus, data is accessed in a block from I/O the
bus and metadata is accessed in a byte-addressable
manner from the memory bus. Metadata access is
significantly accelerated and metadata I/O is
eliminated because metadata in NVM is no longer flushed
back to the disk periodically. A lightweight
consistency mechanism combining fine-grained versioning
and transaction is introduced in the FSMAC. The FSMAC
is implemented on a real NVDIMM platform and
intensively evaluated under different workloads.
Evaluation results show that the FSMAC accelerates the
file system up to 49.2 times for synchronized I/O and
7.22 times for asynchronized I/O. Moreover, it can
achieve significant performance speedup in network
storage and database environment, especially for
metadata-intensive or write-dominated workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2015:TOA,
author = "Zhichao Li and Ming Chen and Amanpreet Mukker and Erez
Zadok",
title = "On the Trade-Offs among Performance, Energy, and
Endurance in a Versatile Hybrid Drive",
journal = j-TOS,
volume = "11",
number = "3",
pages = "13:1--13:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700312",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Aug 7 09:14:17 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "There are trade-offs among performance, energy, and
device endurance for storage systems. Designs optimized
for one dimension or workload often suffer in another.
Therefore, it is important to study the trade-offs to
enable adaptation to workloads and dimensions. As Flash
SSD has emerged, hybrid drives have been studied more
closely. However, hybrids are mainly designed for high
throughput, efficient energy consumption, or improving
endurance-leaving quantitative study on the trade-offs
unexplored. Past endurance studies also lack a concrete
model to help study the trade-offs. Last, previous
designs are often based on inflexible policies that
cannot adapt easily to changing conditions. We designed
and developed GreenDM, a versatile hybrid drive that
combines Flash-based SSDs with traditional HDDs. The
SSD can be used as cache or as primary storage for hot
data. We present our endurance model together with
GreenDM to study these trade-offs. GreenDM presents a
block interface and requires no modifications to
existing software. GreenDM offers tunable parameters to
enable the system to adapt to many workloads. We have
designed, developed, and carefully evaluated GreenDM
with a variety of workloads using commodity SSD and HDD
drives. We demonstrate the importance of versatility to
enable adaptation to various workloads and
dimensions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Miao:2015:ISS,
author = "Youshan Miao and Wentao Han and Kaiwei Li and Ming Wu
and Fan Yang and Lidong Zhou and Vijayan Prabhakaran
and Enhong Chen and Wenguang Chen",
title = "{ImmortalGraph}: a System for Storage and Analysis of
Temporal Graphs",
journal = j-TOS,
volume = "11",
number = "3",
pages = "14:1--14:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700302",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Aug 7 09:14:17 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Temporal graphs that capture graph changes over time
are attracting increasing interest from research
communities, for functions such as understanding
temporal characteristics of social interactions on a
time-evolving social graph. ImmortalGraph is a storage
and execution engine designed and optimized
specifically for temporal graphs. Locality is at the
center of ImmortalGraph's design: temporal graphs are
carefully laid out in both persistent storage and
memory, taking into account data locality in both time
and graph-structure dimensions. ImmortalGraph
introduces the notion of locality-aware batch
scheduling in computation, so that common ``bulk''
operations on temporal graphs are scheduled to maximize
the benefit of in-memory data locality. The design of
ImmortalGraph explores an interesting interplay among
locality, parallelism, and incremental computation in
supporting common mining tasks on temporal graphs. The
result is a high-performance temporal-graph system that
is up to 5 times more efficient than existing database
solutions for graph queries. The locality optimizations
in ImmortalGraph offer up to an order of magnitude
speedup for temporal iterative graph mining compared to
a straightforward application of existing graph engines
on a series of snapshots.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Schindler:2015:ISI,
author = "Jiri Schindler and Erez Zadok",
title = "Introduction to the Special Issue on {USENIX FAST
2015}",
journal = j-TOS,
volume = "11",
number = "4",
pages = "15:1--15:??",
month = nov,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2825000",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jan 25 07:23:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Aghayev:2015:SWS,
author = "Abutalib Aghayev and Mansour Shafaei and Peter
Desnoyers",
title = "{Skylight} --- a Window on Shingled Disk Operation",
journal = j-TOS,
volume = "11",
number = "4",
pages = "16:1--16:??",
month = nov,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2821511",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jan 25 07:23:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We introduce Skylight, a novel methodology that
combines software and hardware techniques to reverse
engineer key properties of drive-managed Shingled
Magnetic Recording (SMR) drives. The software part of
Skylight measures the latency of controlled I/O
operations to infer important properties of
drive-managed SMR, including type, structure, and size
of the persistent cache; type of cleaning algorithm;
type of block mapping; and size of bands. The hardware
part of Skylight tracks drive head movements during
these tests, using a high-speed camera through an
observation window drilled through the cover of the
drive. These observations not only confirm inferences
from measurements, but resolve ambiguities that arise
from the use of latency measurements alone. We show the
generality and efficacy of our techniques by running
them on top of three emulated and two real SMR drives,
discovering valuable performance-relevant details of
the behavior of the real SMR drives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Ma:2015:RCM,
author = "Ao Ma and Rachel Traylor and Fred Douglis and Mark
Chamness and Guanlin Lu and Darren Sawyer and Surendar
Chandra and Windsor Hsu",
title = "{RAIDShield}: Characterizing, Monitoring, and
Proactively Protecting Against Disk Failures",
journal = j-TOS,
volume = "11",
number = "4",
pages = "17:1--17:??",
month = nov,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2820615",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jan 25 07:23:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Modern storage systems orchestrate a group of disks to
achieve their performance and reliability goals. Even
though such systems are designed to withstand the
failure of individual disks, failure of multiple disks
poses a unique set of challenges. We empirically
investigate disk failure data from a large number of
production systems, specifically focusing on the impact
of disk failures on RAID storage systems. Our data
covers about one million SATA disks from six disk
models for periods up to 5 years. We show how observed
disk failures weaken the protection provided by RAID.
The count of reallocated sectors correlates strongly
with impending failures. With these findings we
designed RAIDShield, which consists of two components.
First, we have built and evaluated an active defense
mechanism that monitors the health of each disk and
replaces those that are predicted to fail imminently.
This proactive protection has been incorporated into
our product and is observed to eliminate 88\% of triple
disk errors, which are 80\% of all RAID failures.
Second, we have designed and simulated a method of
using the joint failure probability to quantify and
predict how likely a RAID group is to face multiple
simultaneous disk failures, which can identify disks
that collectively represent a risk of failure even when
no individual disk is flagged in isolation. We find in
simulation that RAID-level analysis can effectively
identify most vulnerable RAID-6 systems, improving the
coverage to 98\% of triple errors. We conclude with
discussions of operational considerations in deploying
RAIDShield more broadly and new directions in the
analysis of disk errors. One interesting approach is to
combine multiple metrics, allowing the values of
different indicators to be used for predictions. Using
newer field data that reports an additional metric,
medium errors, we find that the relative efficacy of
reallocated sectors and medium errors varies across
disk models, offering an additional way to predict
failures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Jannen:2015:BWO,
author = "William Jannen and Jun Yuan and Yang Zhan and Amogh
Akshintala and John Esmet and Yizheng Jiao and Ankur
Mittal and Prashant Pandey and Phaneendra Reddy and
Leif Walsh and Michael A. Bender and Martin
Farach-Colton and Rob Johnson and Bradley C. Kuszmaul
and Donald E. Porter",
title = "{BetrFS}: Write-Optimization in a Kernel File System",
journal = j-TOS,
volume = "11",
number = "4",
pages = "18:1--18:??",
month = nov,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2798729",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jan 25 07:23:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The B$^\epsilon $-tree File System, or BetrFS
(pronounced ``better eff ess''), is the first in-kernel
file system to use a write-optimized data structure
(WODS). WODS are promising building blocks for storage
systems because they support both microwrites and large
scans efficiently. Previous WODS-based file systems
have shown promise but have been hampered in several
ways, which BetrFS mitigates or eliminates altogether.
For example, previous WODS-based file systems were
implemented in user space using FUSE, which
superimposes many reads on a write-intensive workload,
reducing the effectiveness of the WODS. This article
also contributes several techniques for exploiting
write-optimization within existing kernel
infrastructure. BetrFS dramatically improves
performance of certain types of large scans, such as
recursive directory traversals, as well as performance
of arbitrary microdata operations, such as file
creates, metadata updates, and small writes to files.
BetrFS can make small, random updates within a large
file 2 orders of magnitude faster than other local file
systems. BetrFS is an ongoing prototype effort and
requires additional data-structure tuning to match
current general-purpose file systems on some
operations, including deletes, directory renames, and
large sequential writes. Nonetheless, many applications
realize significant performance improvements on BetrFS.
For instance, an in-place rsync of the Linux kernel
source sees roughly 1.6--22 $ \times $ speedup over
commodity file systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{DeCapitaniDiVimercati:2015:SIE,
author = "Sabrina {De Capitani Di Vimercati} and Sara Foresti
and Stefano Paraboschi and Gerardo Pelosi and
Pierangela Samarati",
title = "Shuffle Index: Efficient and Private Access to
Outsourced Data",
journal = j-TOS,
volume = "11",
number = "4",
pages = "19:1--19:??",
month = nov,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2747878",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Jan 25 07:23:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Data outsourcing and cloud computing have been
emerging at an ever-growing rate as successful
approaches for allowing users and companies to rely on
external services for storing and managing data. As
data and access to them are not under the control of
the data owner, there is a clear need to provide proper
confidentiality protection. Such requirements concern
the confidentiality not only of the stored data
(content) but also of the specific accesses (or
patterns of them) that users make on such data. In this
article, we address these issues and propose an
approach for guaranteeing content, access, and pattern
confidentiality in a data outsourcing scenario. The
proposed solution is based on the definition of a
shuffle index structure, which adapts traditional
B+-trees and, by applying a combination of techniques
(covers, caches, and shuffling), ensures
confidentiality of the data and of queries over them,
protecting each single access as well as sequences
thereof. The proposed solution also supports update
operations over the data, while making reads and writes
not recognizable as such by the server. We show that
the shuffle index exhibits a limited performance cost,
thus resulting effectively usable in practice.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Desnoyers:2016:ISI,
author = "Peter Desnoyers and James Hughes",
title = "Introduction to the Special Issue on {MSST 2015}",
journal = j-TOS,
volume = "12",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2853993",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Feb 29 06:03:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Jones:2016:CDR,
author = "Stephanie N. Jones and Ahmed Amer and Ethan L. Miller
and Darrell D. E. Long and Rekha Pitchumani and
Christina R. Strong",
title = "Classifying Data to Reduce Long-Term Data Movement in
Shingled Write Disks",
journal = j-TOS,
volume = "12",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2851505",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Feb 29 06:03:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Shingled magnetic recording (SMR) is a means of
increasing the density of hard drives that brings a new
set of challenges. Due to the nature of SMR disks,
updating in place is not an option. Holes left by
invalidated data can only be filled if the entire band
is reclaimed, and a poor band compaction algorithm
could result in spending a lot of time moving blocks
over the lifetime of the device. We propose using write
frequency to separate blocks to reduce data movement
and develop a band compaction algorithm that implements
this heuristic. We demonstrate how our algorithm
results in improved data management, resulting in an up
to 45\% reduction in required data movements when
compared to naive approaches to band management.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lu:2016:BPE,
author = "Youyou Lu and Jiwu Shu and Long Sun",
title = "Blurred Persistence: Efficient Transactions in
Persistent Memory",
journal = j-TOS,
volume = "12",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2851504",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Feb 29 06:03:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Persistent memory provides data durability in main
memory and enables memory-level storage systems. To
ensure consistency of such storage systems, memory
writes need to be transactional and are carefully moved
across the boundary between the volatile CPU cache and
the persistent main memory. Unfortunately, cache
management in the CPU cache is hardware-controlled.
Legacy transaction mechanisms, which are designed for
disk-based storage systems, are inefficient in ordered
data persistence of transactions in persistent memory.
In this article, we propose the Blurred Persistence
mechanism to reduce the transaction overhead of
persistent memory by blurring the
volatility-persistence boundary. Blurred Persistence
consists of two techniques. First, Execution in Log
executes a transaction in the log to eliminate
duplicated data copies for execution. It allows
persistence of the volatile uncommitted data, which are
detectable with reorganized log structure. Second,
Volatile Checkpoint with Bulk Persistence allows the
committed data to aggressively stay volatile by
leveraging the data durability in the log, as long as
the commit order across threads is kept. By doing so,
it reduces the frequency of forced persistence and
improves cache efficiency. Evaluations show that our
mechanism improves system performance by 56.3\% to
143.7\% for a variety of workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Dragga:2016:GGC,
author = "Chris Dragga and Douglas J. Santry",
title = "{GCTrees}: Garbage Collecting Snapshots",
journal = j-TOS,
volume = "12",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2857056",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Feb 29 06:03:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "File-system snapshots have been a key component of
enterprise storage management since their inception.
Creating and managing them efficiently, while
maintaining flexibility and low overhead, has been a
constant struggle. Although the current
state-of-the-art mechanism---hierarchical reference
counting---performs reasonably well for traditional
small-file workloads, these workloads are increasingly
vanishing from the enterprise data center, replaced
instead with virtual machine and database workloads.
These workloads center around a few very large files,
violating the assumptions that allow hierarchical
reference counting to operate efficiently. To better
cope with these workloads, we introduce Generational
Chain Trees (GCTrees), a novel method of space
management that uses concepts of block lineage across
snapshots rather than explicit reference counting. As a
proof of concept, we create a prototype file
system---gcext4, a modified version of ext4 that uses
GCTrees as a basis for snapshots and copy-on-write. In
evaluating this prototype empirically, we find that
although they have a somewhat higher overhead for
traditional workloads, GCTrees have dramatically lower
overhead than hierarchical reference counting for
large-file workloads, improving by a factor of 34 or
more in some cases. Furthermore, gcext4 performs
comparably to ext4 across all workloads, showing that
GCTrees impose minor cost for their benefits.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Grawinkel:2016:LRM,
author = "Matthias Grawinkel and Lars Nagel and Andr{\'e}
Brinkmann",
title = "{LoneStar RAID}: Massive Array of Offline Disks for
Archival Systems",
journal = j-TOS,
volume = "12",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2840810",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Feb 29 06:03:46 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The need for huge storage archives rises with the ever
growing creation of data. With today's big data and
data analytics applications, some of these huge
archives become active in the sense that all stored
data can be accessed at any time. Running and evolving
these archives is a constant tradeoff between
performance, capacity, and price. We present the
LoneStar RAID, a disk-based storage architecture, which
focuses on high reliability, low energy consumption,
and cheap reads. It is designed for MAID systems with
up to hundreds of disk drives per server and is
optimized for ``write once, read sometimes'' workloads.
We use dedicated data and parity disks, and export the
data disks as individually accessible buckets. By
intertwining disk groups into a two-dimensional RAID
and improving single-disk reliability with intradisk
redundancy, the system achieves an elastic fault
tolerance that can at least recover all 3-disk
failures. Furthermore, we integrate a cache to offload
parity updates and a journal to track the RAID's state.
The LoneStar RAID scheme provides a mean time to data
loss (MTTDL) that competes with today's erasure codes
and is optimized to require only a minimal set of
running disk drives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Jung:2016:NHF,
author = "Myoungsoo Jung and Wonil Choi and Shuwen Gao and Ellis
Herbert {Wilson III} and David Donofrio and John Shalf
and Mahmut Taylan Kandemir",
title = "{NANDFlashSim}: High-Fidelity, Microarchitecture-Aware
{NAND} Flash Memory Simulation",
journal = j-TOS,
volume = "12",
number = "2",
pages = "6:1--6:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700310",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 8 16:03:39 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "As the popularity of NAND flash expands in arenas from
embedded systems to high-performance computing, a
high-fidelity understanding of its specific properties
becomes increasingly important. Further, with the
increasing trend toward multiple-die, multiple-plane
architectures and high-speed interfaces, flash memory
systems are expected to continue to scale and cheapen,
resulting in their broader proliferation. However, when
designing NAND-based devices, making decisions about
the optimal system configuration is nontrivial, because
flash is sensitive to a number of parameters and
suffers from inherent latency variations, and no
available tools suffice for studying these nuances. The
parameters include the architectures, such as multidie
and multiplane, diverse node technologies, bit
densities, and cell reliabilities. Therefore, we
introduce NANDFlashSim, a high-fidelity,
latency-variation-aware, and highly configurable
NAND-flash simulator, which implements a detailed
timing model for 16 state-of-the-art NAND operations.
Using NANDFlashSim, we notably discover the following.
First, regardless of the operation, reads fail to
leverage internal parallelism. Second, MLC provides
lower I/O bus contention than SLC, but contention
becomes a serious problem as the number of dies
increases. Third, many-die architectures outperform
many-plane architectures for disk-friendly workloads.
Finally, employing a high-performance I/O bus or an
increased page size does not enhance energy savings.
Our simulator is available at http://nfs.camelab.org.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wildani:2016:CWG,
author = "Avani Wildani and Ethan L. Miller",
title = "Can We Group Storage? {Statistical} Techniques to
Identify Predictive Groupings in Storage System
Accesses",
journal = j-TOS,
volume = "12",
number = "2",
pages = "7:1--7:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2738042",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 8 16:03:39 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Storing large amounts of data for different users has
become the new normal in a modern distributed cloud
storage environment. Storing data successfully requires
a balance of availability, reliability, cost, and
performance. Typically, systems design for this balance
with minimal information about the data that will pass
through them. We propose a series of methods to derive
groupings from data that have predictive value,
informing layout decisions for data on disk. Unlike
previous grouping work, we focus on dynamically
identifying groupings in data that can be gathered from
active systems in real time with minimal impact using
spatiotemporal locality. We outline several techniques
we have developed and discuss how we select particular
techniques for particular workloads and application
domains. Our statistical and machine-learning-based
grouping algorithms answer questions such as ``What can
a grouping be based on?'' and ``Is a given grouping
meaningful for a given application?'' We design our
models to be flexible and require minimal domain
information so that our results are as broadly
applicable as possible. We intend for this work to
provide a launchpad for future specialized system
design using groupings in combination with caching
policies and architectural distinctions such as tiered
storage to create the next generation of scalable
storage systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Huang:2016:IFB,
author = "Sai Huang and Qingsong Wei and Dan Feng and Jianxi
Chen and Cheng Chen",
title = "Improving Flash-Based Disk Cache with Lazy Adaptive
Replacement",
journal = j-TOS,
volume = "12",
number = "2",
pages = "8:1--8:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2737832",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 8 16:03:39 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "For years, the increasing popularity of flash memory
has been changing storage systems. Flash-based
solid-state drives (SSDs) are widely used as a new
cache tier on top of hard disk drives (HDDs) to speed
up data-intensive applications. However, the endurance
problem of flash memory remains a concern and is
getting worse with the adoption of MLC and TLC flash.
In this article, we propose a novel cache management
algorithm for flash-based disk cache named Lazy
Adaptive Replacement Cache (LARC). LARC adopts the idea
of selective caching to filter out seldom accessed
blocks and prevent them from entering cache. This
avoids cache pollution and preserves popular blocks in
cache for a longer period of time, leading to a higher
hit rate. Meanwhile, by avoiding unnecessary cache
replacements, LARC reduces the volume of data written
to the SSD and yields an SSD-friendly access pattern.
In this way, LARC improves the performance and
endurance of the SSD at the same time. LARC is
self-tuning and incurs little overhead. It has been
extensively evaluated by both trace-driven simulations
and synthetic benchmarks on a prototype implementation.
Our experiments show that LARC outperforms state-of-art
algorithms for different kinds of workloads and extends
SSD lifetime by up to 15.7 times.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2016:EDP,
author = "Yihua Zhang and Marina Blanton",
title = "Efficient Dynamic Provable Possession of Remote Data
via Update Trees",
journal = j-TOS,
volume = "12",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2747877",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 8 16:03:39 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The emergence and wide availability of remote storage
service providers prompted work in the security
community that allows clients to verify integrity and
availability of the data that they outsourced to a not
fully trusted remote storage server at a relatively low
cost. Most recent solutions to this problem allow
clients to read and update (i.e., insert, modify, or
delete) stored data blocks while trying to lower the
overhead associated with verifying the integrity of the
stored data. In this work, we develop a novel scheme,
performance of which favorably compares with the
existing solutions. Our solution additionally enjoys a
number of new features, such as a natural support for
operations on ranges of blocks, revision control, and
support for multiple user access to shared content. The
performance guarantees that we achieve stem from a
novel data structure called a balanced update tree and
removing the need for interaction during update
operations in addition to communicating the updates
themselves.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wang:2016:SIW,
author = "Wei Wang and Tao Xie and Abhinav Sharma",
title = "{SWANS}: an Interdisk Wear-Leveling Strategy for
{RAID-0} Structured {SSD} Arrays",
journal = j-TOS,
volume = "12",
number = "3",
pages = "10:1--10:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2756555",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "NAND flash memory-based solid state disks (SSDs) have
been widely used in enterprise servers. However, flash
memory has limited write endurance, as a block becomes
unreliable after a finite number of program/erase
cycles. Existing wear-leveling techniques are
essentially intradisk data distribution schemes, as
they can only even wear out across the flash medium
within a single SSD. When multiple SSDs are organized
in an array manner in server applications, an interdisk
wear-leveling technique, which can ensure a uniform
wear-out distribution across SSDs, is much needed. In
this article, we propose a novel SSD-array level
wear-leveling strategy called SWANS (Smoothing Wear
Across NSSDs) for an SSD array structured in a RAID-0
format, which is frequently used in server
applications. SWANS dynamically monitors and balances
write distributions across SSDs in an intelligent way.
Further, to evaluate its effectiveness, we build an SSD
array simulator on top of a validated single SSD
simulator. Next, SWANS is implemented in its array
controller. Comprehensive experiments with real-world
traces show that SWANS decreases the standard deviation
of writes across SSDs on average by 16.7x. The gap in
the total bytes written between the most written SSD
and the least written SSD in an 8-SSD array shrinks at
least 1.3x.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Moon:2016:DRI,
author = "Sangwhan Moon and A. L. Narasimha Reddy",
title = "Does {RAID} Improve Lifetime of {SSD} Arrays?",
journal = j-TOS,
volume = "12",
number = "3",
pages = "11:1--11:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2764915",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Parity protection at the system level is typically
employed to compose reliable storage systems. However,
careful consideration is required when SSD-based
systems employ parity protection. First, additional
writes are required for parity updates. Second, parity
consumes space on the device, which results in write
amplification from less efficient garbage collection at
higher space utilization. This article analyzes the
effectiveness of SSD-based RAID and discusses the
potential benefits and drawbacks in terms of
reliability. A Markov model is presented to estimate
the lifetime of SSD-based RAID systems in different
environments. In a small array, our results show that
parity protection provides benefit only with
considerably low space utilizations and low data access
rates. However, in a large system, RAID improves data
lifetime even when we take write amplification into
account.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kang:2016:MPV,
author = "Junbin Kang and Chunming Hu and Tianyu Wo and Ye Zhai
and Benlong Zhang and Jinpeng Huai",
title = "{MultiLanes}: Providing Virtualized Storage for
{OS}-Level Virtualization on Manycores",
journal = j-TOS,
volume = "12",
number = "3",
pages = "12:1--12:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2801155",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "OS-level virtualization is often used for server
consolidation in data centers because of its high
efficiency. However, the sharing of storage stack
services among the colocated containers incurs
contention on shared kernel data structures and locks
within I/O stack, leading to severe performance
degradation on manycore platforms incorporating fast
storage technologies (e.g., SSDs based on nonvolatile
memories). This article presents MultiLanes, a
virtualized storage system for OS-level virtualization
on manycores. MultiLanes builds an isolated I/O stack
on top of a virtualized storage device for each
container to eliminate contention on kernel data
structures and locks between them, thus scaling them to
manycores. Meanwhile, we propose a set of techniques to
tune the overhead induced by storage-device
virtualization to be negligible, and to scale the
virtualized devices to manycores on the host, which
itself scales poorly. To reduce the contention within
each single container, we further propose SFS, which
runs multiple file-system instances through the
proposed virtualized storage devices, distributes all
files under each directory among the underlying
file-system instances, then stacks a unified namespace
on top of them. The evaluation of our prototype system
built for Linux container (LXC) on a 32-core machine
with both a RAM disk and a modern flash-based SSD
demonstrates that MultiLanes scales much better than
Linux in micro- and macro-benchmarks, bringing
significant performance improvements, and that
MultiLanes with SFS can further reduce the contention
within each single container.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chen:2016:IPF,
author = "Feng Chen and Binbing Hou and Rubao Lee",
title = "Internal Parallelism of Flash Memory-Based Solid-State
Drives",
journal = j-TOS,
volume = "12",
number = "3",
pages = "13:1--13:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818376",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "A unique merit of a solid-state drive (SSD) is its
internal parallelism. In this article, we present a set
of comprehensive studies on understanding and
exploiting internal parallelism of SSDs. Through
extensive experiments and thorough analysis, we show
that exploiting internal parallelism of SSDs can not
only substantially improve input/output (I/O)
performance but also may lead to some surprising side
effects and dynamics. For example, we find that with
parallel I/Os, SSD performance is no longer highly
sensitive to access patterns (random or sequential),
but rather to other factors, such as data access
interferences and physical data layout. Many of our
prior understandings about SSDs also need to be
reconsidered. For example, we find that with parallel
I/Os, write performance could outperform reads and is
largely independent of access patterns, which is
opposite to our long-existing common understanding
about slow random writes on SSDs. We have also observed
a strong interference between concurrent reads and
writes as well as the impact of physical data layout to
parallel I/O performance. Based on these findings, we
present a set of case studies in database management
systems, a typical data-intensive application. Our case
studies show that exploiting internal parallelism is
not only the key to enhancing application performance,
and more importantly, it also fundamentally changes the
equation for optimizing applications. This calls for a
careful reconsideration of various aspects in
application and system designs. Furthermore, we give a
set of experimental studies on new-generation SSDs and
the interaction between internal and external
parallelism in an SSD-based Redundant Array of
Independent Disks (RAID) storage. With these critical
findings, we finally make a set of recommendations to
system architects and application designers for
effectively exploiting internal parallelism.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Basak:2016:SWI,
author = "Jayanta Basak and Kushal Wadhwani and Kaladhar
Voruganti",
title = "Storage Workload Identification",
journal = j-TOS,
volume = "12",
number = "3",
pages = "14:1--14:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818716",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Storage workload identification is the task of
characterizing a workload in a storage system (more
specifically, network storage system-NAS or SAN) and
matching it with the previously known workloads. We
refer to storage workload identification as ``workload
identification'' in the rest of this article. Workload
identification is an important problem for cloud
providers to solve because (1) providers can leverage
this information to colocate similar workloads to make
the system more predictable and (2) providers can
identify workloads and subsequently give guidance to
the subscribers as to associated best practices (with
respect to configuration) for provisioning those
workloads. Historically, people have identified
workloads by looking at their read/write ratios,
random/sequential ratios, block size, and interarrival
frequency. Researchers are well aware that workload
characteristics change over time and that one cannot
just take a point in time view of a workload, as that
will incorrectly characterize workload behavior.
Increasingly, manual detection of workload signature is
becoming harder because (1) it is difficult for a human
to detect a pattern and (2) representing a workload
signature by a tuple consisting of average values for
each of the signature components leads to a large
error. In this article, we present workload signature
detection and a matching algorithm that is able to
correctly identify workload signatures and match them
with other similar workload signatures. We have tested
our algorithm on nine different workloads generated
using publicly available traces and on real customer
workloads running in the field to show the robustness
of our approach.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lee:2016:EST,
author = "Sungjin Lee and Dongkun Shin and Youngjin Kim and
Jihong Kim",
title = "Exploiting Sequential and Temporal Localities to
Improve Performance of {NAND} Flash-Based {SSDs}",
journal = j-TOS,
volume = "12",
number = "3",
pages = "15:1--15:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2905054",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "NAND flash-based Solid-State Drives (SSDs) are
becoming a viable alternative as a secondary storage
solution for many computing systems. Since the physical
characteristics of NAND flash memory are different from
conventional Hard-Disk Drives (HDDs), flash-based SSDs
usually employ an intermediate software layer, called a
Flash Translation Layer (FTL). The FTL runs several
firmware algorithms for logical-to-physical mapping,
I/O interleaving, garbage collection, wear-leveling,
and so on. These FTL algorithms not only have a great
effect on storage performance and lifetime, but also
determine hardware cost and data integrity. In general,
a hybrid FTL scheme has been widely used in mobile
devices because it exhibits high performance and high
data integrity at a low hardware cost. Recently, a
demand-based FTL based on page-level mapping has been
rapidly adopted in high-performance SSDs. The
demand-based FTL more effectively exploits the
device-level parallelism than the hybrid FTL and
requires a small amount of memory by keeping only
popular mapping entries in DRAM. Because of this
caching mechanism, however, the demand-based FTL is not
robust enough for power failures and requires extra
reads to fetch missing mapping entries from NAND flash.
In this article, we propose a new flash translation
layer called LAST++. The proposed LAST++ scheme is
based on the hybrid FTL, thus it has the inherent
benefits of the hybrid FTL, including low resource
requirements, strong robustness for power failures, and
high read performance. By effectively exploiting the
locality of I/O references, LAST++ increases
device-level parallelism and reduces garbage collection
overheads. This leads to a great improvement of I/O
performance and makes it possible to overcome the
limitations of the hybrid FTL. Our experimental results
show that LAST++ outperforms the demand-based FTL by
27\% for writes and 7\% for reads, on average, while
offering higher robustness against sudden power
failures. LAST++ also improves write performance by
39\%, on average, over the existing hybrid FTL.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wan:2016:HSF,
author = "Jiguang Wan and Peng Xu and Xubin He and Jibin Wang
and Junyao Li and Changsheng Xie",
title = "{H-Scale}: a Fast Approach to Scale Disk Arrays via
Hybrid Stripe Deployment",
journal = j-TOS,
volume = "12",
number = "3",
pages = "16:1--16:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2822895",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "To satisfy the explosive growth of data in large-scale
data centers, where redundant arrays of independent
disks (RAIDs), especially RAID-5, are widely deployed,
effective storage scaling and disk expansion methods
are desired. However, a way to reduce the data
migration overhead and maintain the reliability of the
original RAID are major concerns of storage scaling. To
address these problems, we propose a new RAID scaling
scheme, H-Scale, to achieve fast RAID scaling via
hybrid stripe layouts. H-Scale takes advantage of the
loose restriction of stripe structures to choose
migrated data and to create hybrid stripe structures.
The main advantages of our scheme include: (1)
dramatically reducing the data migration overhead and
thus speeding up the scaling process, (2) maintaining
the original RAID's reliability, (3) balancing the
workload among disks after scaling, and (4) providing a
general scaling approach for different RAID levels. Our
theoretical analysis show that H-Scale outperforms
existing scaling solutions in terms of data migration,
I/O overheads, and parity update operations. Evaluation
results on a prototype implementation demonstrate that
H-Scale speeds up the online scaling process by up to
60\% under SPC traces, and similar improvements on
scaling time and user response time are also achieved
by evaluations using standard benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Basak:2016:UFL,
author = "Jayanta Basak and P. C. Nagesh",
title = "A User-Friendly Log Viewer for Storage Systems",
journal = j-TOS,
volume = "12",
number = "3",
pages = "17:1--17:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2846101",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:06 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "System log files contains messages emitted from
several modules within a system and carries valuable
information about the system state such as device
status and error conditions and also about the various
tasks within the system such as program names,
execution path, including function names and
parameters, and the task completion status. For
customers with remote support, the system collects and
transmits these logs to a central enterprise
repository, where these are monitored for alerts,
problem forecasting, and troubleshooting. Very large
log files limit the interpretability for the support
engineers. For an expert, a large volume of log
messages may not pose any problem; however, an
inexperienced person may get flummoxed due to the
presence of a large number of log messages. Often it is
desired to present the log messages in a comprehensive
manner where a person can view the important messages
first and then go into details if required. In this
article, we present a user-friendly log viewer where we
first hide the unimportant or inconsequential messages
from the log file. A user can then click a particular
hidden view and get the details of the hidden messages.
Messages with low utility are considered
inconsequential as their removal does not impact the
end user for the aforesaid purpose such as problem
forecasting or troubleshooting. We relate the utility
of a message to the probability of its appearance in
the due context. We present machine-learning-based
techniques that computes the usefulness of individual
messages in a log file. We demonstrate identification
and discarding of inconsequential messages to shrink
the log size to acceptable limits. We have tested this
over real-world logs and observed that eliminating such
low value data can reduce the log files significantly
(30\% to 55\%), with minimal error rates (7\% to 20\%).
When limited user feedback is available, we show
modifications to the technique to learn the user intent
and accordingly further reduce the error.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Diesburg:2016:TLA,
author = "Sarah Diesburg and Christopher Meyers and Mark
Stanovich and An-I Andy Wang and Geoff Kuenning",
title = "{TrueErase}: Leveraging an Auxiliary Data Path for
Per-File Secure Deletion",
journal = j-TOS,
volume = "12",
number = "4",
pages = "18:1--18:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2854882",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "One important aspect of privacy is the ability to
securely delete sensitive data from electronic storage
in such a way that it cannot be recovered; we call this
action secure deletion. Short of physically destroying
the entire storage medium, existing software
secure-deletion solutions tend to be piecemeal at best
--- they may only work for one type of storage or file
system, may force the user to delete all files instead
of selected ones, may require the added complexities of
encryption and key storage, may require extensive
changes and additions to the computer's operating
system or storage firmware, and may not handle system
crashes gracefully. We present TrueErase, a holistic
secure-deletion framework for individual systems that
contain sensitive data. Through design, implementation,
verification, and evaluation on both a hard drive and
NAND flash, TrueErase shows that it is possible to
construct a per-file, secure-deletion framework that
can accommodate different storage media and legacy file
systems, require limited changes to legacy systems, and
handle common crash scenarios. TrueErase can serve as a
building block by cryptographic systems that securely
delete information by erasing encryption keys. The
overhead is dependent on spatial locality, number of
sensitive files, and workload (computational- or
I/O-bound).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Song:2016:EMM,
author = "Nae Young Song and Yongseok Son and Hyuck Han and Heon
Young Yeom",
title = "Efficient Memory-Mapped {I/O} on Fast Storage Device",
journal = j-TOS,
volume = "12",
number = "4",
pages = "19:1--19:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2846100",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In modern operating systems, memory-mapped I/O (mmio)
is an important access method that maps a file or
file-like resource to a region of memory. The mapping
allows applications to access data from files through
memory semantics (i.e., load/store) and it provides
ease of programming. The number of applications that
use mmio are increasing because memory semantics can
provide better performance than file semantics (i.e.,
read/write). As more data are located in the main
memory, the performance of applications can be enhanced
owing to the effect of a large cache. When mmio is
used, hot data tend to reside in the main memory and
cold data are located in storage devices such as HDD
and SSD; data placement in the memory hierarchy depends
on the virtual memory subsystem of the operating
system. Generally, the performance of storage devices
has a direct impact on the performance of mmio. It is
widely expected that better storage devices will lead
to better performance. However, the expectation is
limited when fast storage devices are used since the
virtual memory subsystem does not reflect the
performance feature of those devices. In this article,
we examine the Linux virtual memory subsystem and mmio
path to determine the influence of fast storage on the
existing Linux kernel. Throughout our investigation, we
find that the overhead of the Linux virtual memory
subsystem, negligible on the HDD, prevents applications
from using the full performance of fast storage
devices. To reduce the overheads and fully exploit the
fast storage devices, we present several optimization
techniques. We modify the Linux kernel to implement our
optimization techniques and evaluate our prototyped
system with low-latency storage devices. Experimental
results show that our optimized mmio has up to 7x
better performance than the original mmio. We also
compare our system to a system that has enough memory
to keep all data in the main memory. The system with
insufficient memory and our mmio achieves 92\%
performance of the resource-rich system. This result
implies that our virtual memory subsystem for mmap can
effectively extend the main memory with fast storage
devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Paulo:2016:EDD,
author = "Jo{\~a}o Paulo and Jos{\'e} Pereira",
title = "Efficient Deduplication in a Distributed Primary
Storage Infrastructure",
journal = j-TOS,
volume = "12",
number = "4",
pages = "20:1--20:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2876509",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "A large amount of duplicate data typically exists
across volumes of virtual machines in cloud computing
infrastructures. Deduplication allows reclaiming these
duplicates while improving the cost-effectiveness of
large-scale multitenant infrastructures. However,
traditional archival and backup deduplication systems
impose prohibitive storage overhead for virtual
machines hosting latency-sensitive applications.
Primary deduplication systems reduce such penalty but
rely on special cluster filesystems, centralized
components, or restrictive workload assumptions. Also,
some of these systems reduce storage overhead by
confining deduplication to off-peak periods that may be
scarce in a cloud environment. We present DEDIS, a
dependable and fully decentralized system that performs
cluster-wide off-line deduplication of virtual
machines' primary volumes. DEDIS works on top of any
unsophisticated storage backend, centralized or
distributed, as long as it exports a basic shared block
device interface. Also, DEDIS does not rely on data
locality assumptions and incorporates novel
optimizations for reducing deduplication overhead and
increasing its reliability. The evaluation of an
open-source prototype shows that minimal I/O overhead
is achievable even when deduplication and intensive
storage I/O are executed simultaneously. Also, our
design scales out and allows collocating DEDIS
components and virtual machines in the same servers,
thus, sparing the need of additional hardware.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "20",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yang:2016:WSZ,
author = "Yue Yang and Jianwen Zhu",
title = "Write Skew and {Zipf} Distribution: Evidence and
Implications",
journal = j-TOS,
volume = "12",
number = "4",
pages = "21:1--21:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2908557",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/benfords-law.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Understanding workload characteristics is essential to
storage systems design and performance optimization.
With the emergence of flash memory as a new viable
storage medium, the new design concern of flash
endurance arises, necessitating a revisit of workload
characteristics, in particular, of the write behavior.
Inspired by Web caching studies where a Zipf-like
access pattern is commonly found, we hypothesize that
write count distribution at the block level may also
follow Zipf's Law. To validate this hypothesis, we
study 48 block I/O traces collected from a wide variety
of real and benchmark applications. Through extensive
analysis, we demonstrate that the Zipf-like pattern
indeed widely exists in write traffic provided its
disguises are removed by statistical processing. This
finding implies that write skew in a large class of
applications could be analytically expressed and, thus,
facilitates design tradeoff explorations adaptive to
workload characteristics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "21",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wu:2016:LLD,
author = "Suzhen Wu and Bo Mao and Xiaolan Chen and Hong Jiang",
title = "{LDM}: Log Disk Mirroring with Improved Performance
and Reliability for {SSD}-Based Disk Arrays",
journal = j-TOS,
volume = "12",
number = "4",
pages = "22:1--22:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2892639",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "With the explosive growth in data volume, the I/O
bottleneck has become an increasingly daunting
challenge for big data analytics. Economic forces,
driven by the desire to introduce flash-based
Solid-State Drives (SSDs) into the high-end storage
market, have resulted in hybrid storage systems in the
cloud. However, a single flash-based SSD cannot satisfy
the performance, reliability, and capacity requirements
of enterprise or HPC storage systems in the cloud.
While an array of SSDs organized in a RAID structure,
such as RAID5, provides the potential for high storage
capacity and bandwidth, reliability and performance
problems will likely result from the parity update
operations. In this article, we propose a Log Disk
Mirroring scheme (LDM) to improve the performance and
reliability of SSD-based disk arrays. LDM is a hybrid
disk array architecture that consists of several SSDs
and two hard disk drives (HDDs). In an LDM array, the
two HDDs are mirrored as a write buffer that temporally
absorbs the small write requests. The small and random
write data are written on the mirroring buffer by using
the logging technique that sequentially appends new
data. The small write data are merged and destaged to
the SSD-based disk array during the system idle
periods. Our prototype implementation of the LDM array
and the performance evaluations show that the LDM array
significantly outperforms the pure SSD-based disk
arrays by a factor of 20.4 on average, and outperforms
HPDA by a factor of 5.0 on average. The reliability
analysis shows that the MTTDL of the LDM array is 2.7
times and 1.7 times better than that of pure SSD-based
disk arrays and HPDA disk arrays.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "22",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Esiner:2016:FFB,
author = "Ertem Esiner and Adilet Kachkeev and Samuel Braunfeld
and Alptekin K{\"u}p{\c{c}}{\"u} and {\"O}znur
{\"O}zkasap",
title = "{FlexDPDP}: Flexlist-Based Optimized Dynamic Provable
Data Possession",
journal = j-TOS,
volume = "12",
number = "4",
pages = "23:1--23:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2943783",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "With increasing popularity of cloud storage,
efficiently proving the integrity of data stored on an
untrusted server has become significant. Authenticated
skip lists and rank-based authenticated skip lists
(RBASL) have been used to provide support for provable
data update operations in cloud storage. However, in a
dynamic file scenario, an RBASL based on block indices
falls short when updates are not proportional to a
fixed block size; such an update to the file, even if
small, may result in $ O(n) $ updates on the data
structure for a file with n blocks. To overcome this
problem, we introduce FlexList, a flexible length-based
authenticated skip list. FlexList translates
variable-size updates to $ O(\lceil u / B \rceil) $
insertions, removals, or modifications, where u is the
size of the update and B is the (average) block size.
We further present various optimizations on the four
types of skip lists (regular, authenticated, rank-based
authenticated, and FlexList). We build such a structure
in $ O(n) $ time and parallelize this operation for the
first time. We compute one single proof to answer
multiple (non)membership queries and obtain efficiency
gains of 35\%, 35\%, and 40\% in terms of proof time,
energy, and size, respectively. We propose a method of
handling multiple updates at once, achieving efficiency
gains of up to 60\% at the server side and 90\% at the
client side. We also deployed our implementation of
FlexDPDP (dynamic provable data possession (DPDP) with
FlexList instead of RBASL) on PlanetLab, demonstrating
that FlexDPDP performs comparable to the most efficient
static storage scheme (provable data possession (PDP))
while providing dynamic data support.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "23",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hall:2016:TPR,
author = "Robert J. Hall",
title = "Tools for Predicting the Reliability of Large-Scale
Storage Systems",
journal = j-TOS,
volume = "12",
number = "4",
pages = "24:1--24:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2911987",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Data-intensive applications require extreme scaling of
their underlying storage systems. Such scaling,
together with the fact that storage systems must be
implemented in actual data centers, increases the risk
of data loss from failures of underlying components.
Accurate engineering requires quantitatively predicting
reliability, but this remains challenging due to the
need to account for extreme scale, redundancy scheme
type and strength, distribution architecture, and
component dependencies. This article introduces CQS
im-R, a tool suite for predicting the reliability of
large-scale storage system designs and deployments.
CQSim-R includes (a) direct calculations based on an
only-drives-fail failure model and (b) an event-based
simulator for detailed prediction that handles failures
of and failure dependencies among arbitrary (drive or
nondrive) components. These are based on a common
combinatorial framework for modeling placement
strategies. The article demonstrates CQSim-R using
models of common storage systems, including replicated
and erasure coded designs. New results, such as the
poor reliability scaling of spread-placed systems and a
quantification of the impact of data center
distribution and rack-awareness on reliability,
demonstrate the usefulness and generality of the tools.
Analysis and empirical studies show the tools'
soundness, performance, and scalability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "24",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Brown:2017:ISI,
author = "Angela Demke Brown and Florentina Popovici",
title = "Introduction to the Special Issue on {USENIX FAST
2016}",
journal = j-TOS,
volume = "13",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3039209",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Stefanovici:2017:TSS,
author = "Ioan Stefanovici and Bianca Schroeder and Greg O'Shea
and Eno Thereska",
title = "Treating the Storage Stack Like a Network",
journal = j-TOS,
volume = "13",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3032968",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In a data center, an IO from an application to
distributed storage traverses not only the network but
also several software stages with diverse
functionality. This set of ordered stages is known as
the storage or IO stack. Stages include caches,
hypervisors, IO schedulers, file systems, and device
drivers. Indeed, in a typical data center, the number
of these stages is often larger than the number of
network hops to the destination. Yet, while packet
routing is fundamental to networks, no notion of IO
routing exists on the storage stack. The path of an IO
to an endpoint is predetermined and hard coded. This
forces IO with different needs (e.g., requiring
different caching or replica selection) to flow through
a one-size-fits-all IO stack structure, resulting in an
ossified IO stack. This article proposes sRoute, an
architecture that provides a routing abstraction for
the storage stack. sRoute comprises a centralized
control plane and ``sSwitches'' on the data plane. The
control plane sets the forwarding rules in each sSwitch
to route IO requests at runtime based on
application-specific policies. A key strength of our
architecture is that it works with unmodified
applications and Virtual Machines (VMs). This article
shows significant benefits of customized IO routing to
data center tenants: for example, a factor of 10 for
tail IO latency, more than 60\% better throughput for a
customized replication protocol, a factor of 2 in
throughput for customized caching, and enabling live
performance debugging in a running system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yuan:2017:WWR,
author = "Jun Yuan and Yang Zhan and William Jannen and Prashant
Pandey and Amogh Akshintala and Kanchan Chandnani and
Pooja Deo and Zardosht Kasheff and Leif Walsh and
Michael A. Bender and Martin Farach-Colton and Rob
Johnson and Bradley C. Kuszmaul and Donald E. Porter",
title = "Writes Wrought Right, and Other Adventures in File
System Optimization",
journal = j-TOS,
volume = "13",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3032969",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "File systems that employ write-optimized dictionaries
(WODs) can perform random-writes, metadata updates, and
recursive directory traversals orders of magnitude
faster than conventional file systems. However,
previous WOD-based file systems have not obtained all
of these performance gains without sacrificing
performance on other operations, such as file deletion,
file or directory renaming, or sequential writes. Using
three techniques, late-binding journaling, zoning, and
range deletion, we show that there is no fundamental
trade-off in write-optimization. These dramatic
improvements can be retained while matching
conventional file systems on all other operations.
BetrFS 0.2 delivers order-of-magnitude better
performance than conventional file systems on directory
scans and small random writes and matches the
performance of conventional file systems on rename,
delete, and sequential I/O. For example, BetrFS 0.2
performs directory scans $ 2.2 \times $ faster, and
small random writes over two orders of magnitude
faster, than the fastest conventional file system. But
unlike BetrFS 0.1, it renames and deletes files
commensurate with conventional file systems and
performs large sequential I/O at nearly disk bandwidth.
The performance benefits of these techniques extend to
applications as well. BetrFS 0.2 continues to
outperform conventional file systems on many
applications, such as as rsync, git-diff, and tar, but
improves git-clone performance by 35\% over BetrFS 0.1,
yielding performance comparable to other file
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Shin:2017:IAT,
author = "Ji-Yong Shin and Mahesh Balakrishnan and Tudor Marian
and Hakim Weatherspoon",
title = "{Isotope}: {ACID} Transactions for Block Storage",
journal = j-TOS,
volume = "13",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3032967",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Existing storage stacks are top heavy and expect
little from block storage. As a result, new high-level
storage abstractions-and new designs for existing
abstractions-are difficult to realize, requiring
developers to implement from scratch complex
functionality such as failure atomicity and
fine-grained concurrency control. In this article, we
argue that pushing transactional isolation into the
block store (in addition to atomicity and durability)
is both viable and broadly useful, resulting in simpler
high-level storage systems that provide strong
semantics without sacrificing performance. We present
Isotope, a new block store that supports ACID
transactions over block reads and writes. Internally,
Isotope uses a new multiversion concurrency control
protocol that exploits fine-grained, subblock
parallelism in workloads and offers both strict
serializability and snapshot isolation guarantees. We
implemented several high-level storage systems over
Isotope, including two key-value stores that implement
the LevelDB API over a hash table and B-tree,
respectively, and a POSIX file system. We show that
Isotope's block-level transactions enable systems that
are simple (100s of lines of code), robust (i.e.,
providing ACID guarantees), and fast (e.g., 415MB/s for
random file writes). We also show that these systems
can be composed using Isotope, providing applications
with transactions across different high-level
constructs such as files, directories, and key-value
pairs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lu:2017:WSK,
author = "Lanyue Lu and Thanumalayan Sankaranarayana Pillai and
Hariharan Gopalakrishnan and Andrea C. Arpaci-Dusseau
and Remzi H. Arpaci-Dusseau",
title = "{WiscKey}: Separating Keys from Values in
{SSD}-Conscious Storage",
journal = j-TOS,
volume = "13",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3033273",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We present WiscKey, a persistent LSM-tree-based
key-value store with a performance-oriented data layout
that separates keys from values to minimize I/O
amplification. The design of WiscKey is highly SSD
optimized, leveraging both the sequential and random
performance characteristics of the device. We
demonstrate the advantages of WiscKey with both
microbenchmarks and YCSB workloads. Microbenchmark
results show that WiscKey is $ 2.5 \times $ to $ 111
\times $ faster than LevelDB for loading a database
(with significantly better tail latencies) and $ 1.6
\times $ to $ 14 \times $ faster for random lookups.
WiscKey is faster than both LevelDB and RocksDB in all
six YCSB workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2017:CSN,
author = "Ning Li and Hong Jiang and Dan Feng and Zhan Shi",
title = "Customizable {SLO} and Its Near-Precise Enforcement
for Storage Bandwidth",
journal = j-TOS,
volume = "13",
number = "1",
pages = "6:1--6:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2998454",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Cloud service is being adopted as a utility for large
numbers of tenants by renting Virtual Machines (VMs).
But for cloud storage, unpredictable IO characteristics
make accurate Service-Level-Objective (SLO) enforcement
challenging. As a result, it has been very difficult to
support simple-to-use and technology-agnostic SLO
specifying a particular value for a specific metric
(e.g., storage bandwidth). This is because the quality
of SLO enforcement depends on performance error and
fluctuation that measure the precision of SLO
enforcement. High precision of SLO enforcement is
critical for user-oriented performance customization
and user experiences. To address this challenge, this
article presents V-Cup, a framework for VM-oriented
customizable SLO and its near-precise enforcement. It
consists of multiple auto-tuners, each of which exports
an interface for a tenant to customize the desired
storage bandwidth for a VM and enable the storage
bandwidth of the VM to converge on the target value
with a predictable precision. We design and implement
V-Cup in the Xen hypervisor based on the fair sharing
scheduler for VM-level resource management. Our V-Cup
prototype evaluation shows that it achieves satisfying
performance guarantees through near-precise SLO
enforcement.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Qi:2017:CLN,
author = "Shigui Qi and Dan Feng and Nan Su and Linjun Mei and
Jingning Liu",
title = "{CDF--LDPC}: a New Error Correction Method for {SSD}
to Improve the Read Performance",
journal = j-TOS,
volume = "13",
number = "1",
pages = "7:1--7:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3017430",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The raw error rate of a Solid-State drive (SSD)
increases gradually with the increase of Program/Erase
(P/E) cycles, retention time, and read cycles.
Traditional approaches often use Error Correction Code
(ECC) to ensure the reliability of SSDs. For error-free
flash memory pages, time costs spent on ECC are
redundant and make read performance suboptimal. This
article presents a CRC-Detect-First LDPC (CDF-LDPC)
algorithm to optimize the read performance of SSDs. The
basic idea is to bypass Low-Density Parity-Check (LDPC)
decoding of error-free flash memory pages, which can be
found using a Cyclic Redundancy Check (CRC) code. Thus,
error-free pages can be read directly without
sacrificing the reliability of SSDs. Experiment results
show that the read performance is improved more than
50\% compared with traditional approaches. In
particular, when idle time of benchmarks and SSD
parallelism are exploited, CDF-LDPC can be performed
more efficiently. In this case, the read performance of
SSDs can be improved up to about 80\% compared to that
of the state-of-art.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Joo:2017:ERI,
author = "Yongsoo Joo and Sangsoo Park and Hyokyung Bahn",
title = "Exploiting {I/O} Reordering and {I/O} Interleaving to
Improve Application Launch Performance",
journal = j-TOS,
volume = "13",
number = "1",
pages = "8:1--8:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3024094",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Application prefetchers improve application launch
performance through either I/O reordering or I/O
interleaving. However, there has been no proposal to
combine the two techniques together, missing the
opportunity for further optimization. We present a new
application prefetching technique to take advantage of
both the approaches. We evaluated our method with a set
of applications to demonstrate that it reduces cold
start application launch time by 50\%, which is an
improvement of 22\% from the I/O reordering
technique.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Qin:2017:DIR,
author = "Chuan Qin and Jingwei Li and Patrick P. C. Lee",
title = "The Design and Implementation of a Rekeying-Aware
Encrypted Deduplication Storage System",
journal = j-TOS,
volume = "13",
number = "1",
pages = "9:1--9:??",
month = mar,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3032966",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Mar 25 07:00:07 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Rekeying refers to an operation of replacing an
existing key with a new key for encryption. It renews
security protection to protect against key compromise
and enable dynamic access control in cryptographic
storage. However, it is non-trivial to realize
efficient rekeying in encrypted deduplication storage
systems, which use deterministic content-derived
encryption keys to allow deduplication on ciphertexts.
We design and implement a rekeying-aware encrypted
deduplication (REED) storage system. REED builds on a
deterministic version of all-or-nothing transform, such
that it enables secure and lightweight rekeying, while
preserving the deduplication capability. We propose two
REED encryption schemes that trade between performance
and security and extend REED for dynamic access
control. We implement a REED prototype with various
performance optimization techniques and demonstrate how
we can exploit similarity to mitigate key generation
overhead. Our trace-driven testbed evaluation shows
that our REED prototype maintains high performance and
storage efficiency.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Maltzahn:2017:ISI,
author = "Carlos Maltzahn and Vasily Tarasov",
title = "Introduction to the Special Issue on {MSST 2016}",
journal = j-TOS,
volume = "13",
number = "2",
pages = "10:1--10:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3078405",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Ma:2017:LED,
author = "Jingwei Ma and Rebecca J. Stones and Yuxiang Ma and
Jingui Wang and Junjie Ren and Gang Wang and Xiaoguang
Liu",
title = "Lazy Exact Deduplication",
journal = j-TOS,
volume = "13",
number = "2",
pages = "11:1--11:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3078837",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Deduplication aims to reduce duplicate data in storage
systems by removing redundant copies of data blocks,
which are compared to one another using fingerprints.
However, repeated on-disk fingerprint lookups lead to
high disk traffic, which results in a bottleneck. In
this article, we propose a ``lazy'' data deduplication
method, which buffers incoming fingerprints that are
used to perform on-disk lookups in batches, with the
aim of improving subsequent prefetching. In
deduplication in general, prefetching is used to
improve the cache hit rate by exploiting locality
within the incoming fingerprint stream. For lazy
deduplication, we design a buffering strategy that
preserves locality in order to facilitate prefetching.
Furthermore, as the proportion of deduplication time
spent on I/O decreases, the proportion spent on
fingerprint calculation and chunking increases. Thus,
we also utilize parallel approaches (utilizing multiple
CPU cores and a graphics processing unit) to further
improve the overall performance. Experimental results
indicate that the lazy method improves fingerprint
identification performance by over 50\% compared with
an ``eager'' method with the same data layout. The GPU
improves the hash calculation by a factor of 4.6 and
multithreaded chunking by a factor of 4.16.
Deduplication performance can be improved by over 45\%
on SSD and 80\% on HDD in the last round on the real
datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lee:2017:RWA,
author = "Eunji Lee and Julie Kim and Hyokyung Bahn and Sunjin
Lee and Sam H. Noh",
title = "Reducing Write Amplification of Flash Storage through
Cooperative Data Management with {NVM}",
journal = j-TOS,
volume = "13",
number = "2",
pages = "12:1--12:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3060146",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Write amplification is a critical factor that limits
the stable performance of flash-based storage systems.
To reduce write amplification, this article presents a
new technique that cooperatively manages data in flash
storage and nonvolatile memory (NVM). Our scheme
basically considers NVM as the cache of flash storage,
but allows the original data in flash storage to be
invalidated if there is a cached copy in NVM, which can
temporarily serve as the original data. This scheme
eliminates the copy-out operation for a substantial
number of cached data, thereby enhancing garbage
collection efficiency. Simulated results show that the
proposed scheme reduces the copy-out overhead of
garbage collection by 51.4\% and decreases the standard
deviation of response time by 35.4\% on average.
Measurement results obtained by implementing the
proposed scheme in BlueDBM,$^1$ an open-source flash
development platform developed by MIT, show that the
proposed scheme reduces the execution time and
increases IOPS by 2--21\% and 3--18\%, respectively,
for the workloads that we considered. This article is
an extended version of Lee et al. [2016], which was
presented at the 32nd International Conference on
Massive Data Storage Systems and Technology in 2016.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chen:2017:OFS,
author = "Cheng Chen and Jun Yang and Qingsong Wei and Chundong
Wang and Mingdi Xue",
title = "Optimizing File Systems with Fine-grained Metadata
Journaling on Byte-addressable {NVM}",
journal = j-TOS,
volume = "13",
number = "2",
pages = "13:1--13:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3060147",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Journaling file systems have been widely adopted to
support applications that demand data consistency.
However, we observed that the overhead of journaling
can cause up to 48.2\% performance drop under certain
kinds of workloads. On the other hand, the emerging
high-performance, byte-addressable Non-volatile Memory
(NVM) has the potential to minimize such overhead by
being used as the journal device. The traditional
journaling mechanism based on block devices is
nevertheless unsuitable for NVM due to the write
amplification of metadata journal we observed. In this
article, we propose a fine-grained metadata journal
mechanism to fully utilize the low-latency
byte-addressable NVM so that the overhead of journaling
can be significantly reduced. Based on the observation
that conventional block-based metadata journal contains
up to 90\% clean metadata that is unnecessary to be
journalled, we design a fine-grained journal format for
byte-addressable NVM which contains only modified
metadata. Moreover, we redesign the process of
transaction committing, checkpointing, and recovery in
journaling file systems utilizing the new journal
format. Therefore, thanks to the reduced amount of
ordered writes for journals, the overhead of journaling
can be reduced without compromising the file system
consistency. To evaluate our fine-grained metadata
journaling mechanism, we have implemented a journaling
file system prototype based on Ext4 and JBD2 in Linux.
Experimental results show that our NVM-based
fine-grained metadata journaling is up to 15.8 $ \times
$ faster than the traditional approach under FileBench
workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhou:2017:UAI,
author = "You Zhou and Fei Wu and Ping Huang and Xubin He and
Changsheng Xie and Jian Zhou",
title = "Understanding and Alleviating the Impact of the Flash
Address Translation on Solid State Devices",
journal = j-TOS,
volume = "13",
number = "2",
pages = "14:1--14:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3051123",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Flash-based solid state devices (SSDs) have been
widely employed in consumer and enterprise storage
systems. However, the increasing SSD capacity imposes
great pressure on performing efficient logical to
physical address translation in a page-level flash
translation layer (FTL). Existing schemes usually
employ a built-in RAM to store mapping information,
called mapping cache, to speed up the address
translation. Since only a fraction of the mapping table
can be cached due to limited cache space, a large
number of extra flash accesses are required for cache
management and garbage collection, degrading the
performance and lifetime of an SSD. In this paper, we
first apply analytical models to investigate the key
factors that incur extra flash accesses during address
translation. Then, we propose a novel page-level FTL
with an efficient translation page-level caching
mechanism, named TPFTL, to minimize the extra flash
accesses. TPFTL employs a two-level least recently used
(LRU) list with space-efficient optimizations to
organize cached mapping entries. Inspired by the
models, we further design a workload-adaptive loading
policy combined with an efficient replacement policy to
increase the cache hit rate and reduce the writebacks
of replaced dirty entries. Finally, we evaluate TPFTL
using extensive trace-driven simulations. Our
evaluation results show that compared to the
state-of-the-art FTLs, TPFTL significantly reduces the
extra operations caused by address translation,
achieving reductions on system response time and write
amplification by up to 27.1\% and 32.2\%,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Liu:2017:HPG,
author = "Qing Liu and Dan Feng and Yuchong Hu and Zhan Shi and
Min Fu",
title = "High-Performance General Functional Regenerating Codes
with Near-Optimal Repair Bandwidth",
journal = j-TOS,
volume = "13",
number = "2",
pages = "15:1--15:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3051122",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Erasure codes are widely used in modern distributed
storage systems to prevent data loss and server
failures. Regenerating codes are a class of erasure
codes that trade storage efficiency and computation for
repair bandwidth reduction. However, their nonunified
coding parameters and huge computational overhead
prohibit their applications. Hence, we first propose a
family of General Functional Regenerating (GFR) codes
with uncoded repair, balancing storage efficiency and
repair bandwidth with general parameters. The GFR codes
take advantage of a heuristic repair algorithm, which
makes efforts to employ as little repair bandwidth as
possible to repair a single failure. Second, we also
present a scheduled shift multiplication (SSM)
algorithm, which accelerates the matrix product over
the Galois field by scheduling the order of coding
operations, so encoding and repairing of GFR codes can
be executed by fast bitwise shifting and exclusive-OR.
Compared to the traditional table-lookup multiplication
algorithm, our SSM algorithm gains 1.2 to 2 X speedup
in our experimental evaluations, with little effect on
the repair success rate.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hou:2017:UPB,
author = "Binbing Hou and Feng Chen and Zhonghong Ou and Ren
Wang and Michael Mesnier",
title = "Understanding {I/O} Performance Behaviors of Cloud
Storage from a Client's Perspective",
journal = j-TOS,
volume = "13",
number = "2",
pages = "16:1--16:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3078838",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Cloud storage has gained increasing popularity in the
past few years. In cloud storage, data is stored in the
service provider's data centers, and users access data
via the network. For such a new storage model, our
prior wisdom about conventional storage may not remain
valid nor applicable to the emerging cloud storage. In
this article, we present a comprehensive study to gain
insight into the unique characteristics of cloud
storage and optimize user experiences with cloud
storage from a client's perspective. Unlike prior
measurement work that mostly aims to characterize cloud
storage providers or specific client applications, we
focus on analyzing the effects of various client-side
factors on the user-experienced performance. Through
extensive experiments and quantitative analysis, we
have obtained several important findings. For example,
we find that (1) a proper combination of parallelism
and request size can achieve optimized bandwidths, (2)
a client's capabilities and geographical location play
an important role in determining the end-to-end
user-perceivable performance, and (3) the interference
among mixed cloud storage requests may cause
performance degradation. Based on our findings, we
showcase a sampling- and inference-based method to
determine a proper combination for different
optimization goals. We further present a set of case
studies on client-side chunking and parallelization for
typical cloud-based applications. Our studies show that
specific attention should be paid to fully exploiting
the capabilities of clients and the great potential of
cloud storage services.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Iliadis:2017:EEQ,
author = "Ilias Iliadis and Jens Jelitto and Yusik Kim and
Slavisa Sarafijanovic and Vinodh Venkatesan",
title = "{ExaPlan}: Efficient Queueing-Based Data Placement,
Provisioning, and Load Balancing for Large Tiered
Storage Systems",
journal = j-TOS,
volume = "13",
number = "2",
pages = "17:1--17:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3078839",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Jun 10 16:10:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tos/;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Multi-tiered storage, where each tier consists of one
type of storage device (e.g., SSD, HDD, or disk
arrays), is a commonly used approach to achieve both
high performance and cost efficiency in large-scale
systems that need to store data with vastly different
access characteristics. By aligning the access
characteristics of the data, either fixed-sized extents
or variable-sized files, to the characteristics of the
storage devices, a higher performance can be achieved
for any given cost. This article presents ExaPlan, a
method to determine both the data-to-tier assignment
and the number of devices in each tier that minimize
the system's mean response time for a given budget and
workload. In contrast to other methods that constrain
or minimize the system load, ExaPlan directly minimizes
the system's mean response time estimated by a queueing
model. Minimizing the mean response time is typically
intractable as the resulting optimization problem is
both nonconvex and combinatorial in nature. ExaPlan
circumvents this intractability by introducing a
parameterized data placement approach that makes it a
highly scalable method that can be easily applied to
exascale systems. Through experiments that use
parameters from real-world storage systems, such as
CERN and LOFAR, it is demonstrated that ExaPlan
provides solutions that yield lower mean response times
than previous works. It supports standalone SSDs and
HDDs as well as disk arrays as storage tiers, and
although it uses a static workload representation, we
provide empirical evidence that underlying dynamic
workloads have invariant properties that can be deemed
static for the purpose of provisioning a storage
system. ExaPlan is also effective as a load-balancing
tool used for placing data across devices within a
tier, resulting in an up to 3.6-fold reduction of
response time compared with a traditional
load-balancing algorithm, such as the Longest
Processing Time heuristic.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kuenning:2017:ISI,
author = "Geoff Kuenning and Carl Waldspurger",
title = "Introduction to the Special Issue on {USENIX FAST
2017}",
journal = j-TOS,
volume = "13",
number = "3",
pages = "18:1--18:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3131620",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Pillai:2017:ACC,
author = "Thanumalayan Sankaranarayana Pillai and Ramnatthan
Alagappan and Lanyue Lu and Vijay Chidambaram and
Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "Application Crash Consistency and Performance with
{CCFS}",
journal = j-TOS,
volume = "13",
number = "3",
pages = "19:1--19:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3119897",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Recent research has shown that applications often
incorrectly implement crash consistency. We present the
Crash-Consistent File System (ccfs), a file system that
improves the correctness of application-level crash
consistency protocols while maintaining high
performance. A key idea in ccfs is the abstraction of a
stream. Within a stream, updates are committed in
program order, improving correctness; across streams,
there are no ordering restrictions, enabling scheduling
flexibility and high performance. We empirically
demonstrate that applications running atop ccfs achieve
high levels of crash consistency. Further, we show that
ccfs performance under standard file-system benchmarks
is excellent, in the worst case on par with the highest
performing modes of Linux ext4, and in some cases
notably better. Overall, we demonstrate that both
application correctness and high performance can be
realized in a modern file system.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Ganesan:2017:RDI,
author = "Aishwarya Ganesan and Ramnatthan Alagappan and Andrea
C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "Redundancy Does Not Imply Fault Tolerance: Analysis of
Distributed Storage Reactions to File-System Faults",
journal = j-TOS,
volume = "13",
number = "3",
pages = "20:1--20:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3125497",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We analyze how modern distributed storage systems
behave in the presence of file-system faults such as
data corruption and read and write errors. We
characterize eight popular distributed storage systems
and uncover numerous problems related to file-system
fault tolerance. We find that modern distributed
systems do not consistently use redundancy to recover
from file-system faults: a single file-system fault can
cause catastrophic outcomes such as data loss,
corruption, and unavailability. We also find that the
above outcomes arise due to fundamental problems in
file-system fault handling that are common across many
systems. Our results have implications for the design
of next-generation fault-tolerant distributed and cloud
storage systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "20",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chen:2017:VMN,
author = "Ming Chen and Geetika Babu Bangera and Dean Hildebrand
and Farhaan Jalia and Geoff Kuenning and Henry Nelson
and Erez Zadok",
title = "{vNFS}: Maximizing {NFS} Performance with Compounds
and Vectorized {I/O}",
journal = j-TOS,
volume = "13",
number = "3",
pages = "21:1--21:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3116213",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Modern systems use networks extensively, accessing
both services and storage across local and remote
networks. Latency is a key performance challenge, and
packing multiple small operations into fewer large ones
is an effective way to amortize that cost, especially
after years of significant improvement in bandwidth but
not latency. To this end, the NFSv4 protocol supports a
compounding feature to combine multiple operations. Yet
compounding has been underused since its conception
because the synchronous POSIX file-system API issues
only one (small) request at a time. We propose vNFS, an
NFSv4.1-compliant client that exposes a vectorized
high-level API and leverages NFS compound procedures to
maximize performance. We designed and implemented vNFS
as a user-space RPC library that supports an assortment
of bulk operations on multiple files and directories.
We found it easy to modify several UNIX utilities, an
HTTP/2 server, and Filebench to use vNFS. We evaluated
vNFS under a wide range of workloads and network
latency conditions, showing that vNFS improves
performance even for low-latency networks. On
high-latency networks, vNFS can improve performance by
as much as two orders of magnitude.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "21",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yan:2017:TTF,
author = "Shiqin Yan and Huaicheng Li and Mingzhe Hao and
Michael Hao Tong and Swaminathan Sundararaman and
Andrew A. Chien and Haryadi S. Gunawi",
title = "Tiny-Tail Flash: Near-Perfect Elimination of Garbage
Collection Tail Latencies in {NAND SSDs}",
journal = j-TOS,
volume = "13",
number = "3",
pages = "22:1--22:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3121133",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Flash storage has become the mainstream destination
for storage users. However, SSDs do not always deliver
the performance that users expect. The core culprit of
flash performance instability is the well-known garbage
collection (GC) process, which causes long delays as
the SSD cannot serve (blocks) incoming I/Os, which then
induces the long tail latency problem. We present
ttFlash as a solution to this problem. ttFlash is a
``tiny-tail'' flash drive (SSD) that eliminates
GC-induced tail latencies by circumventing GC-blocked
I/Os with four novel strategies: plane-blocking GC,
rotating GC, GC-tolerant read, and GC-tolerant flush.
These four strategies leverage the timely combination
of modern SSD internal technologies such as powerful
controllers, parity-based redundancies, and
capacitor-backed RAM. Our strategies are dependent on
the use of intra-plane copyback operations. Through an
extensive evaluation, we show that ttFlash comes
significantly close to a ``no-GC'' scenario.
Specifically, between the 99 and 99.99th percentiles,
ttFlash is only 1.0 to 2.6$ \times $ slower than the
no-GC case, while a base approach suffers from 5--138$
\times $ GC-induced slowdowns.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "22",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kesavan:2017:EFS,
author = "Ram Kesavan and Rohit Singh and Travis Grusecki and
Yuvraj Patel",
title = "Efficient Free Space Reclamation in {WAFL}",
journal = j-TOS,
volume = "13",
number = "3",
pages = "23:1--23:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3125647",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "NetApp$^\reg $ WAFL$^\reg $ is a transactional file
system that uses the copy-on-write mechanism to support
fast write performance and efficient snapshot creation.
However, copy-on-write increases the demand on the file
system to find free blocks quickly, which makes rapid
free space reclamation essential. Inability to find
free blocks quickly may impede allocations for incoming
writes. Efficiency is also important, because the task
of reclaiming free space may consume CPU and other
resources at the expense of client operations. In this
article, we describe the evolution (over more than a
decade) of the WAFL algorithms and data structures for
reclaiming space with minimal impact to the overall
performance of the storage appliance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "23",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2017:PDA,
author = "Cheng Li and Philip Shilane and Fred Douglis and Grant
Wallace",
title = "{Pannier}: Design and Analysis of a Container-Based
Flash Cache for Compound Objects",
journal = j-TOS,
volume = "13",
number = "3",
pages = "24:1--24:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3094785",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Classic caching algorithms leverage recency, access
count, and/or other properties of cached blocks at
per-block granularity. However, for media such as flash
which have performance and wear penalties for small
overwrites, implementing cache policies at a larger
granularity is beneficial. Recent research has focused
on buffering small blocks and writing in large
granularities, sometimes called containers, but it has
not explored the ramifications and best strategies for
caching compound blocks consisting of logically
distinct, but physically co-located, blocks. Containers
may have highly diverse blocks, with mixtures of
frequently accessed, infrequently accessed, and
invalidated blocks. We propose and evaluate Pannier, a
flash cache layer that provides high performance while
extending flash lifespan. Pannier uses three main
techniques: (1) leveraging block access counts to
manage cache containers, (2) incorporating block
liveness as a property to improve flash cache space
efficiency, and (3) designing a multi-step feedback
controller to ensure a flash cache reaches its desired
lifespan while maintaining performance. Our evaluation
shows that Pannier improves flash cache performance and
extends lifespan beyond previous per-block and
container-aware caching policies. More fundamentally,
our investigation highlights the importance of creating
new policies for caching compound blocks in flash.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "24",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chen:2017:EAM,
author = "Haibo Chen and Heng Zhang and Mingkai Dong and Zhaoguo
Wang and Yubin Xia and Haibing Guan and Binyu Zang",
title = "Efficient and Available In-Memory {KV}-Store with
Hybrid Erasure Coding and Replication",
journal = j-TOS,
volume = "13",
number = "3",
pages = "25:1--25:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3129900",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In-memory key/value store (KV-store) is a key building
block for many systems like databases and large
websites. Two key requirements for such systems are
efficiency and availability, which demand a KV-store to
continuously handle millions of requests per second. A
common approach to availability is using replication,
such as primary-backup (PBR), which, however, requires
M +1 times memory to tolerate M failures. This renders
scarce memory unable to handle useful user jobs. This
article makes the first case of building highly
available in-memory KV-store by integrating erasure
coding to achieve memory efficiency, while not notably
degrading performance. A main challenge is that an
in-memory KV-store has much scattered metadata. A
single KV put may cause excessive coding operations and
parity updates due to excessive small updates to
metadata. Our approach, namely Cocytus, addresses this
challenge by using a hybrid scheme that leverages PBR
for small-sized and scattered data (e.g., metadata and
key), while only applying erasure coding to relatively
large data (e.g., value). To mitigate well-known issues
like lengthy recovery of erasure coding, Cocytus uses
an online recovery scheme by leveraging the replicated
metadata information to continuously serve KV requests.
To further demonstrate the usefulness of Cocytus, we
have built a transaction layer by using Cocytus as a
fast and reliable storage layer to store database
records and transaction logs. We have integrated the
design of Cocytus to Memcached and extend it to support
in-memory transactions. Evaluation using YCSB with
different KV configurations shows that Cocytus incurs
low overhead for latency and throughput, can tolerate
node failures with fast online recovery, while saving
33\% to 46\% memory compared to PBR when tolerating two
failures. A further evaluation using the SmallBank OLTP
benchmark shows that in-memory transactions can run
atop Cocytus with high throughput, low latency, and low
abort rate and recover fast from consecutive
failures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "25",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Liu:2017:SEC,
author = "Qing Liu and Dan Feng and Hong Jiang and Yuchong Hu
and Tianfeng Jiao",
title = "Systematic Erasure Codes with Optimal Repair Bandwidth
and Storage",
journal = j-TOS,
volume = "13",
number = "3",
pages = "26:1--26:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3109479",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Erasure codes are widely used in distributed storage
systems to prevent data loss. Traditional codes suffer
from a typical repair-bandwidth problem in which the
amount of data required to reconstruct the lost data,
referred to as the repair bandwidth, is often far more
than the theoretical minimum. While many novel codes
have been proposed in recent years to reduce the repair
bandwidth, these codes either require extra storage and
computation overhead or are only applicable to some
special cases. To address the weaknesses of the
existing solutions to the repair-bandwidth problem, we
propose Z Codes, a general family of codes capable of
achieving the theoretical lower bound of repair
bandwidth versus storage. To the best of our knowledge,
the Z codes are the first general systematic erasure
codes that jointly achieve optimal repair bandwidth and
storage. Further, we generalize the Z codes to the GZ
codes to gain the Maximum Distance Separable (MDS)
property. Our evaluations of a real system indicate
that Z/GZ and Reed--Solomon (RS) codes show
approximately close encoding and repairing speeds,
while GZ codes achieve over 37.5\% response time
reduction for repairing the same size of data, compared
to the RS and Cauchy Reed--Solomon (CRS) codes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "26",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Viotti:2017:HRH,
author = "Paolo Viotti and Dan Dobre and Marko Vukoli{\'c}",
title = "{Hybris}: Robust Hybrid Cloud Storage",
journal = j-TOS,
volume = "13",
number = "3",
pages = "27:1--27:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3119896",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Oct 30 08:04:10 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Besides well-known benefits, commodity cloud storage
also raises concerns that include security,
reliability, and consistency. We present Hybris
key-value store, the first robust hybrid cloud storage
system, aiming at addressing these concerns leveraging
both private and public cloud resources. Hybris
robustly replicates metadata on trusted private
premises (private cloud), separately from data, which
are dispersed (using replication or erasure coding)
across multiple untrusted public clouds. Hybris
maintains metadata stored on private premises at the
order of few dozens of bytes per key, avoiding the
scalability bottleneck at the private cloud. In turn,
the hybrid design allows Hybris to efficiently and
robustly tolerate cloud outages but also potential
malice in clouds without overhead. Namely, to tolerate
up to $f$ malicious clouds, in the common case of the
Hybris variant with data replication, writes replicate
data across $ f + 1$ clouds, whereas reads involve a
single cloud. In the worst case, only up to $f$
additional clouds are used. This is considerably better
than earlier multi-cloud storage systems that required
costly $ 3 f + 1$ clouds to mask $f$ potentially
malicious clouds. Finally, Hybris leverages strong
metadata consistency to guarantee to Hybris
applications strong data consistency without any
modifications to the eventually consistent public
clouds. We implemented Hybris in Java and evaluated it
using a series of micro and macro-benchmarks. Our
results show that Hybris significantly outperforms
comparable multi-cloud storage systems and approaches
the performance of bare-bone commodity public cloud
storage.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "27",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Parker-Wood:2017:ISI,
author = "Aleatha Parker-Wood and Thomas Schwarz",
title = "Introduction to the {Special Issue on Massive Storage
Systems and Technology 2017}",
journal = j-TOS,
volume = "13",
number = "4",
pages = "28:1--28:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3148596",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "28",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yao:2017:BEK,
author = "Ting Yao and Jiguang Wan and Ping Huang and Xubin He
and Fei Wu and Changsheng Xie",
title = "Building Efficient Key--Value Stores via a Lightweight
Compaction Tree",
journal = j-TOS,
volume = "13",
number = "4",
pages = "29:1--29:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3139922",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Log-Structure Merge tree (LSM-tree) has been one of
the mainstream indexes in key-value systems supporting
a variety of write-intensive Internet applications in
today's data centers. However, the performance of
LSM-tree is seriously hampered by constantly occurring
compaction procedures, which incur significant write
amplification and degrade the write throughput. To
alleviate the performance degradation caused by
compactions, we introduce a lightweight compaction tree
(LWC-tree), a variant of LSM-tree index optimized for
minimizing the write amplification and maximizing the
system throughput. The lightweight compaction
drastically decreases write amplification by appending
data in a table and only merging the metadata that have
much smaller size. Using our proposed LWC-tree, we have
implemented three key-value LWC-stores on different
storage mediums including Shingled Magnetic Recording
(SMR) drives, Solid State Drives (SSD), and
conventional Hard Disk Drives (HDDs). The LWC-store is
particularly optimized for SMR drives, as it eliminates
the multiplicative I/O amplification from both
LSM-trees and SMR drives. Due to the lightweight
compaction procedure, LWC-store reduces the write
amplification by a factor of up to 5$ \times $ compared
to the popular LevelDB key-value store. Moreover, the
random write throughput of the LWC-tree on SMR drives
is significantly improved by up to 467\% even compared
with LevelDB on conventional HDDs. Furthermore,
LWC-tree has wide applicability and delivers impressive
performance improvement in various conditions,
including different storage mediums (i.e., SMR, HDD,
SSD) and various value sizes and access patterns (i.e.,
uniform and Zipfian).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "29",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Liu:2017:OWL,
author = "Qingyue Liu and Peter Varman",
title = "{Ouroboros} Wear Leveling for {NVRAM} Using
Hierarchical Block Migration",
journal = j-TOS,
volume = "13",
number = "4",
pages = "30:1--30:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3139530",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Emerging nonvolatile RAM (NVRAM) technologies have a
limit on the number of writes that can be made to any
cell, similar to the erasure limits in NAND Flash. This
motivates the need for wear leveling techniques to
distribute the writes evenly among the cells. Unlike
NAND Flash, cells in NVRAM can be rewritten without the
need for erasing the entire containing block, avoiding
the issues of space reclamation and garbage collection,
motivating alternate approaches to the problem. In this
article, we propose a hierarchical wear-leveling model
called Ouroboros wear leveling. Ouroboros uses a
two-level strategy whereby frequent low-cost
intraregion wear leveling at small granularity is
combined with interregion wear leveling at a larger
time interval and granularity. Ouroboros is a hybrid
migration scheme that exploits correct demand
predictions in making better wear-leveling decisions
while using randomization to avoid wear-leveling
attacks by deterministic access patterns. We also
propose a way to optimize wear-leveling parameter
settings to meet a target smoothness level under
limited time and space overhead constraints for
different memory architectures and trace
characteristics. Several experiments are performed on
synthetically generated memory traces with special
characteristics, two block-level storage traces, and
two memory-line-level memory traces. The results show
that Ouroboros wear leveling can distribute writes
smoothly across the whole NVRAM with no more than 0.2\%
space overhead and 0.52\% time overhead for a 512GB
memory.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "30",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yadgar:2017:ETY,
author = "Gala Yadgar and Roman Shor",
title = "Experience from Two Years of Visualizing Flash with
{SSDPlayer}",
journal = j-TOS,
volume = "13",
number = "4",
pages = "31:1--31:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149356",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Data visualization is a thriving field of computer
science, with widespread impact on diverse scientific
disciplines, from medicine and meteorology to visual
data mining. Advances in large-scale storage systems,
as well as low-level storage technology, played a
significant role in accelerating the applicability and
adoption of modern visualization techniques.
Ironically, ``the cobbler's children have no shoes'':
Researchers who wish to analyze storage systems and
devices are usually limited to a variety of static
histograms and basic displays. The dynamic nature of
data movement on flash has motivated the introduction
of SSDPlayer, a graphical tool for visualizing the
various processes that cause data movement on
solid-state drives (SSDs). In 2015, we used the initial
version of SSDPlayer to demonstrate how visualization
can assist researchers and developers in their
understanding of modern, complex flash-based systems.
While we continued to use SSDPlayer for analysis
purposes, we found it extremely useful for education
and presentation purposes as well. In this article, we
describe our experience from two years of using,
sharing, and extending SSDPlayer and how similar
techniques can further advance storage systems research
and education.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "31",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kim:2017:SSU,
author = "Dongjin Kim and Kyu Ho Park and Chan-Hyun Youn",
title = "{SUPA}: a Single Unified Read-Write Buffer and
Pattern-Change-Aware {FTL} for the High Performance of
Multi-Channel {SSD}",
journal = j-TOS,
volume = "13",
number = "4",
pages = "32:1--32:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3129901",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "To design the write buffer and flash translation layer
(FTL) for a solid-state drive (SSD), previous studies
have tried to increase overall SSD performance by
parallel I/O and garbage collection overhead reduction.
Recent works have proposed pattern-based managements,
which uses the request size and read- or
write-intensiveness to apply different policies to each
type of data. In our observation, the locations of read
and write requests are closely related, and the pattern
of each type of data can be changed. In this work, we
propose SUPA, a single unified read-write buffer and
pattern-change-aware FTL on multi-channel SSD
architecture. To increase both read and write hit
ratios on the buffer based on locality, we use a single
unified read-write buffer for both clean and dirty
blocks. With proposed buffer, we can increase buffer
hit ratio up to 8.0\% and reduce 33.6\% and 7.5\% of
read and write latencies, respectively. To handle
pattern-changed blocks, we add a pattern handler
between the buffer and the FTL, which monitors channel
status and handles data by applying one of the two
different policies according to the pattern changes.
With pattern change handling process, we can reduce
1.0\% and 15.4\% of read and write latencies,
respectively. In total, our evaluations show that SUPA
can get up to 2.0 and 3.9 times less read and write
latency, respectively, without loss of lifetime in
comparison to previous works.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "32",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hu:2017:ORL,
author = "Yuchong Hu and Xiaolu Li and Mi Zhang and Patrick P.
C. Lee and Xiaoyang Zhang and Pan Zhou and Dan Feng",
title = "Optimal Repair Layering for Erasure-Coded Data
Centers: From Theory to Practice",
journal = j-TOS,
volume = "13",
number = "4",
pages = "33:1--33:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149349",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Repair performance in hierarchical data centers is
often bottlenecked by cross-rack network transfer.
Recent theoretical results show that the cross-rack
repair traffic can be minimized through repair
layering, whose idea is to partition a repair operation
into inner-rack and cross-rack layers. However, how
repair layering should be implemented and deployed in
practice remains an open issue. In this article, we
address this issue by proposing a practical repair
layering framework called DoubleR. We design two
families of practical double regenerating codes (DRC),
which not only minimize the cross-rack repair traffic
but also have several practical properties that improve
state-of-the-art regenerating codes. We implement and
deploy DoubleR atop the Hadoop Distributed File System
(HDFS) and show that DoubleR maintains the theoretical
guarantees of DRC and improves the repair performance
of regenerating codes in both node recovery and
degraded read operations.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "33",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zeng:2017:CCS,
author = "Lingfang Zeng and Zehao Zhang and Yang Wang and Dan
Feng and Kenneth B. Kent",
title = "{CosaFS}: a Cooperative Shingle-Aware File System",
journal = j-TOS,
volume = "13",
number = "4",
pages = "34:1--34:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149482",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In this article, we design and implement a cooperative
shingle-aware file system, called CosaFS, on
heterogeneous storage devices that mix solid-state
drives (SSDs) and shingled magnetic recording (SMR)
technology to improve the overall performance of
storage systems. The basic idea of CosaFS is to
classify objects as hot or cold objects based on a
proposed Lookahead with Recency Weight scheme. If an
object is identified as a hot (small) object, then it
will be served by SSD. Otherwise, cold (large) objects
are stored on SMR. For an SMR, large objects can be
accessed in large sequential blocks, rendering the
performance of their accesses comparable with that of
accessing the same large sequential blocks as if they
were stored on a hard drive. Small objects, such as
inodes and directories, are stored on the SSD where
``seeks'' for such objects are nearly free. With
thorough empirical studies, we demonstrate that CosaFS,
as a cooperative shingle-aware file system, with
metadata separation and cache-assistance, is a very
effective way to handle the disk-based data demanded by
the shingled writes and outperforms the device- and
host-side shingle-aware file systems in terms of
throughput, IOPS, and access latency as well.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "34",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Einziger:2017:THE,
author = "Gil Einziger and Roy Friedman and Ben Manes",
title = "{TinyLFU}: a Highly Efficient Cache Admission Policy",
journal = j-TOS,
volume = "13",
number = "4",
pages = "35:1--35:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149371",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "This article proposes to use a frequency-based cache
admission policy in order to boost the effectiveness of
caches subject to skewed access distributions. Given a
newly accessed item and an eviction candidate from the
cache, our scheme decides, based on the recent access
history, whether it is worth admitting the new item
into the cache at the expense of the eviction
candidate. This concept is enabled through a novel
approximate LFU structure called TinyLFU, which
maintains an approximate representation of the access
frequency of a large sample of recently accessed items.
TinyLFU is very compact and lightweight as it builds
upon Bloom filter theory. We study the properties of
TinyLFU through simulations of both synthetic workloads
and multiple real traces from several sources. These
simulations demonstrate the performance boost obtained
by enhancing various replacement policies with the
TinyLFU admission policy. Also, a new combined
replacement and eviction policy scheme nicknamed
W-TinyLFU is presented. W-TinyLFU is demonstrated to
obtain equal or better hit ratios than other
state-of-the-art replacement policies on these traces.
It is the only scheme to obtain such good results on
all traces.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "35",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hatzieleftheriou:2017:CSJ,
author = "Andromachi Hatzieleftheriou and Stergios V.
Anastasiadis",
title = "Client-Side Journaling for Durable Shared Storage",
journal = j-TOS,
volume = "13",
number = "4",
pages = "36:1--36:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149372",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Hardware consolidation in the datacenter often leads
to scalability bottlenecks from heavy utilization of
critical resources, such as the storage and network
bandwidth. Client-side caching on durable media is
already applied at block level to reduce the storage
backend load but has received criticism for added
overhead, restricted sharing, and possible data loss at
client crash. We introduce a journal to the
kernel-level client of an object-based distributed
filesystem to improve durability at high I/O
performance and reduced shared resource utilization.
Storage virtualization at the file interface achieves
clear consistency semantics across data and metadata,
supports native file sharing among clients, and
provides flexible configuration of durable data staging
at the host. Over a prototype that we have implemented,
we experimentally quantify the performance and
efficiency of the proposed Arion system in comparison
to a production system. We run microbenchmarks and
application-level workloads over a local cluster and a
public cloud. We demonstrate reduced latency by 60\%
and improved performance up to 150\% at reduced server
network and disk bandwidth by 41\% and 77\%,
respectively. The performance improvement reaches 92\%
for 16 relational databases as clients and gets as high
as 11.3x with two key-value stores as clients.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "36",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kim:2017:GED,
author = "Sang-Hoon Kim and Jinhyuk Lee and Jin-Soo Kim",
title = "{GCMix}: an Efficient Data Protection Scheme against
the Paired Page Interference",
journal = j-TOS,
volume = "13",
number = "4",
pages = "37:1--37:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149373",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In multi-level cell (MLC) NAND flash memory, two
logical pages are overlapped on a single physical page.
Even after a logical page is programmed, the data can
be corrupted if the programming of the coexisting
logical page is interrupted. This phenomenon is called
paired page interference. This article proposes a novel
software technique to deal with the paired page
interference without any additional hardware or extra
page write. The proposed technique utilizes valid pages
in the victim block during garbage collection (GC) as
the backup against the interference, and pairs them
with incoming pages written by the host. This approach
eliminates undesirable page copy to backup pages
against the interference. However, such a strategy has
an adverse effect on the hot/cold separation policy,
which is essential to improve the efficiency of GC. To
limit the downside, we devise a metric to estimate the
benefit of GCMix on-the-fly so that GCMix can be
adaptively utilized only when the benefit outweighs the
overhead. Evaluations using synthetic and real
workloads show GCMix can effectively deal with the
paired page interference, reducing the write
amplification factor by up to 17.5\%compared to the
traditional technique, while providing comparable I/O
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "37",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Shafaei:2017:MDM,
author = "Mansour Shafaei and Mohammad Hossein Hajkazemi and
Peter Desnoyers and Abutalib Aghayev",
title = "Modeling Drive-Managed {SMR} Performance",
journal = j-TOS,
volume = "13",
number = "4",
pages = "38:1--38:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3139242",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Accurately modeling drive-managed Shingled Magnetic
Recording (SMR) disks is a challenge, requiring an
array of approaches including both existing disk
modeling techniques as well as new techniques for
inferring internal translation layer algorithms. In
this work, we present the first predictive simulation
model of a generally available drive-managed SMR disk.
Despite the use of unknown proprietary algorithms in
this device, our model that is derived from external
measurements is able to predict mean latency within a
few percent, and with an Root Mean Square (RMS)
cumulative latency error of 25\% or less for most
workloads tested. These variations, although not small,
are in most cases less than three times the
drive-to-drive variation seen among seemingly identical
drives.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "38",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Haghdoost:2017:HSR,
author = "Alireza Haghdoost and Weiping He and Jerry Fredin and
David H. C. Du",
title = "{\tt hfplayer}: Scalable Replay for Intensive Block
{I/O} Workloads",
journal = j-TOS,
volume = "13",
number = "4",
pages = "39:1--39:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149392",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We introduce new methods to replay intensive block I/O
workloads more accurately. These methods can be used to
reproduce realistic workloads for benchmarking,
performance validation, and tuning of a
high-performance block storage device/system. In this
article, we study several sources in the stock
operating system that introduce uncertainty in the
workload replay. Based on the remedies of these
findings, we design and develop a new replay tool
called hfplayer that replays intensive block I/O
workloads in a similar unscaled environment with more
accuracy. To replay a given workload trace in a scaled
environment with faster storage or host server, the
dependency between I/O requests becomes crucial since
the timing and ordering of I/O requests is expected to
change according to these dependencies. Therefore, we
propose a heuristic way of speculating I/O dependencies
in a block I/O trace. Using the generated dependency
graph, hfplayer tries to propagate I/O related
performance gains appropriately along the I/O
dependency chains and mimics the original application
behavior when it executes in a scaled environment with
slower or faster storage system and servers. We
evaluate hfplayer with a wide range of workloads using
several accuracy metrics and find that it produces
better accuracy when compared to other replay
approaches.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "39",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hou:2017:GLL,
author = "Binbing Hou and Feng Chen",
title = "{GDS--LC}: a Latency- and Cost-Aware Client Caching
Scheme for Cloud Storage",
journal = j-TOS,
volume = "13",
number = "4",
pages = "40:1--40:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149374",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Dec 22 18:16:19 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Successfully integrating cloud storage as a primary
storage layer in the I/O stack is highly challenging.
This is essentially due to two inherent critical
issues: the high and variant cloud I/O latency and the
per-I/O pricing model of cloud storage. To minimize the
associated latency and monetary cost with cloud I/Os,
caching is a crucial technology, as it directly
influences how frequently the client has to communicate
with the cloud. Unfortunately, current cloud caching
schemes are mostly designed to optimize miss reduction
as the sole objective and only focus on improving
system performance while ignoring the fact that various
cache misses could have completely distinct effects in
terms of latency and monetary cost. In this article, we
present a cost-aware caching scheme, called GDS-LC,
which is highly optimized for cloud storage caching.
Different from traditional caching schemes that merely
focus on improving cache hit ratios and the classic
cost-aware schemes that can only achieve a single
optimization target, GDS-LC offers a comprehensive
cache design by considering not only the access
locality but also the object size, associated latency,
and price, aiming at enhancing the user experience with
cloud storage from two aspects: access latency and
monetary cost. To achieve this, GDS-LC virtually
partitions the cache space into two regions: a
high-priority latency-aware region and a low-priority
price-aware region. Each region is managed by a
cost-aware caching scheme, which is based on
GreedyDual-Size (GDS) and designed for a cloud storage
scenario by adopting clean-dirty differentiation and
latency normalization. The GDS-LC framework is highly
flexible, and we present a further enhanced algorithm,
called GDS-LCF, by incorporating access frequency in
caching decisions. We have built a prototype to emulate
a typical cloud client cache and evaluate GDS-LC and
GDS-LCF with Amazon Simple Storage Services (S3) in
three different scenarios: local cloud, Internet cloud,
and heterogeneous cloud. Our experimental results show
that our caching schemes can effectively achieve both
optimization goals: low access latency and low monetary
cost. It is our hope that this work can inspire the
community to reconsider the cache design in the cloud
environment, especially for the purpose of integrating
cloud storage into the current storage stack as a
primary layer.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "40",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Noh:2018:ECL,
author = "Sam H. Noh",
title = "{Editor-in-Chief} Letter",
journal = j-TOS,
volume = "14",
number = "1",
pages = "1:1--1:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3180478",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Xue:2018:ISI,
author = "Chun Jason Xue and Michael Swift",
title = "Introduction to the Special Issue on {NVM} and
Storage",
journal = j-TOS,
volume = "14",
number = "1",
pages = "2:1--2:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3180480",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chen:2018:UUS,
author = "Shuo-Han Chen and Tseng-Yi Chen and Yuan-Hao Chang and
Hsin-Wen Wei and Wei-Kuan Shih",
title = "{UnistorFS}: a Union Storage File System Design for
Resource Sharing between Memory and Storage on
Persistent {RAM}-Based Systems",
journal = j-TOS,
volume = "14",
number = "1",
pages = "3:1--3:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177918",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "With the advanced technology in persistent random
access memory (PRAM), PRAM such as three-dimensional
XPoint memory and Phase Change Memory (PCM) is emerging
as a promising candidate for the next-generation medium
for both (main) memory and storage. Previous works
mainly focus on how to overcome the possible endurance
issues of PRAM while both main memory and storage own a
partition on the same PRAM device. However, a holistic
software-level system design should be proposed to
fully exploit the benefit of PRAM. This article
proposes a union storage file system (UnistorFS), which
aims to jointly manage the PRAM resource for main
memory and storage. The proposed UnistorFS realizes the
concept of using the PRAM resource as memory and
storage interchangeably to achieve resource sharing
while main memory and storage coexist on the same PRAM
device with no partition or logical boundary. This
approach not only enables PRAM resource sharing but
also eliminates unnecessary data movements between main
memory and storage since they are already in the same
address space and can be accessed directly. At the same
time, the proposed UnistorFS ensures the persistence of
file data and sanity of the file system after power
recycling. A series of experiments was conducted on a
modified Linux kernel. The results show that the
proposed UnistorFS can eliminate unnecessary memory
accesses and outperform other PRAM-based file systems
for 0.2--8.7 times in terms of read/write
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Chen:2018:HPM,
author = "Youmin Chen and Jiwu Shu and Jiaxin Ou and Youyou Lu",
title = "{HiNFS}: a Persistent Memory File System with Both
Buffering and Direct-Access",
journal = j-TOS,
volume = "14",
number = "1",
pages = "4:1--4:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3204454",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Persistent memory provides data persistence at main
memory with emerging non-volatile main memories
(NVMMs). Recent persistent memory file systems
aggressively use direct access, which directly copy
data between user buffer and the storage layer, to
avoid the double-copy overheads through the OS page
cache. However, we observe they all suffer from slow
writes due to NVMMs' asymmetric read-write performance
and much slower performance than DRAM. In this article,
we propose HiNFS, a high-performance file system for
non-volatile main memory, to combine both buffering and
direct access for fine-grained file system operations.
HiNFS uses an NVMM-aware Write Buffer to buffer the
lazy-persistent file writes in DRAM, while performing
direct access to NVMM for eager-persistent file writes.
It directly reads file data from both DRAM and NVMM, by
ensuring read consistency with a combination of the
DRAM Block Index and Cacheline Bitmap to track the
latest data between DRAM and NVMM. HiNFS also employs a
Buffer Benefit Model to identify the eager-persistent
file writes before issuing I/Os. Evaluations show that
HiNFS significantly improves throughput by up to 184\%
and reduces execution time by up to 64\%comparing with
state-of-the-art persistent memory file systems PMFS
and EXT4-DAX.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kim:2018:CTC,
author = "Wook-Hee Kim and Jihye Seo and Jinwoong Kim and
Beomseok Nam",
title = "{clfB-tree}: Cacheline Friendly Persistent {B}-tree
for {NVRAM}",
journal = j-TOS,
volume = "14",
number = "1",
pages = "5:1--5:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3129263",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Emerging byte-addressable non-volatile memory (NVRAM)
is expected to replace block device storages as an
alternative low-latency persistent storage device. If
NVRAM is used as a persistent storage device, a cache
line instead of a disk page will be the unit of data
transfer, consistency, and durability. In this work, we
design and develop clfB-tree -a B-tree structure whose
tree node fits in a single cache line. We employ
existing write combining store buffer and restricted
transactional memory to provide a failure-atomic cache
line write operation. Using the failure-atomic cache
line write operations, we atomically update a clfB-tree
node via a single cache line flush instruction without
major changes in hardware. However, there exist many
processors that do not provide SW interface for
transactional memory. For those processors, our
proposed clfB-tree achieves atomicity and consistency
via in-place update, which requires maximum four cache
line flushes. We evaluate the performance of clfB-tree
on an NVRAM emulation board with ARM Cortex A-9
processor and a workstation that has Intel Xeon E7-4809
v3 processor. Our experimental results show clfB-tree
outperforms wB-tree and CDDS B-tree by a large margin
in terms of both insertion and search performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wang:2018:PRT,
author = "Chundong Wang and Qingsong Wei and Lingkun Wu and Sibo
Wang and Cheng Chen and Xiaokui Xiao and Jun Yang and
Mingdi Xue and Yechao Yang",
title = "Persisting {RB-Tree} into {NVM} in a Consistency
Perspective",
journal = j-TOS,
volume = "14",
number = "1",
pages = "6:1--6:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177915",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Byte-addressable non-volatile memory (NVM) is going to
reshape conventional computer systems. With advantages
of low latency, byte-addressability, and
non-volatility, NVM can be directly put on the memory
bus to replace DRAM. As a result, both system and
application software have to be adjusted to perceive
the fact that the persistent layer moves up to the
memory. However, most of the current in-memory data
structures will be problematic with consistency issues
if not well tuned with NVM. This article places
emphasis on an important in-memory structure that is
widely used in computer systems, i.e., the
Red/Black-tree (RB-tree). Since it has a long and
complicated update process, the RB-tree is prone to
inconsistency problems with NVM. This article presents
an NVM-compatible consistent RB-tree with a new
technique named cascade-versioning. The proposed
RB-tree (i) is all-time consistent and scalable and
(ii) needs no recovery procedure after system crashes.
Experiment results show that the RB-tree for NVM not
only achieves the aim of consistency with insignificant
spatial overhead but also yields comparable performance
to an ordinary volatile RB-tree.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Liu:2018:DDT,
author = "Mengxing Liu and Mingxing Zhang and Kang Chen and
Xuehai Qian and Yongwei Wu and Weimin Zheng and Jinglei
Ren",
title = "{DudeTx}: Durable Transactions Made Decoupled",
journal = j-TOS,
volume = "14",
number = "1",
pages = "7:1--7:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177920",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Emerging non-volatile memory (NVM) offers
non-volatility, byte-addressability, and fast access at
the same time. It is suggested that programs should
access NVM directly through CPU load and store
instructions. To guarantee crash consistency, durable
transactions are regarded as a common choice of
applications for accessing persistent memory data.
However, existing durable transaction systems employ
either undo logging, which requires a fence for every
memory write, or redo logging, which requires
intercepting all memory reads within transactions. Both
approaches incur significant overhead. This article
presents DudeTx, a crash-consistent durable transaction
system that avoids the drawbacks of both undo and redo
logging. DudeTx uses shadow DRAM to decouple the
execution of a durable transaction into three fully
asynchronous steps. The advantage is that only minimal
fences and no memory read instrumentation are required.
This design enables an out-of-the-box concurrency
control mechanism, transactional memory or fine-grained
locks, to be used as an independent component. The
evaluation results show that DudeTx adds durability to
a software transactional memory system with only
7.4\%--24.6\% throughput degradation. Compared to
typical existing durable transaction systems, DudeTx
provides 1.7$ \times $ --4.4$ \times $ higher
throughput. Moreover, DudeTx can be implemented with
hardware transactional memory or lock-based concurrency
control, leading to a further 1.7$ \times $ and 3.3$
\times $ speedup, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Sun:2018:BDS,
author = "Yuliang Sun and Yu Wang and Huazhong Yang",
title = "Bidirectional Database Storage and {SQL} Query
Exploiting {RRAM}-Based Process-in-Memory Structure",
journal = j-TOS,
volume = "14",
number = "1",
pages = "8:1--8:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177917",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "With the coming of the ``Big Data'' era, a
high-energy-efficiency database is demanded for the
Internet of things (IoT) application scenarios. The
emerging Resistive Random Access Memory (RRAM) has been
considered as an energy-efficient replacement of DRAM
for next-generation main memory. In this article, we
propose an RRAM-based SQL query unit with
process-in-memory (PIM) characteristics. A
bidirectional storage structure for a database in RRAM
crossbar array is proposed that avoids redundant data
transfer to cache and reduces cache miss rate compared
with the storage method in DRAM for an in-memory
database. The proposed RRAM-based SQL query unit can
support a representative subset of SQL queries in
memory and thus can further reduce the data transfer
cost. The corresponding query optimization method is
proposed to fully utilize the PIM characteristics.
Simulation results show that the energy efficiency of
the proposed RRAM-based SQL query unit is increased by
4 to 6 orders of magnitudes compared with the
traditional architecture.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Han:2018:NRB,
author = "Lei Han and Zhaoyan Shen and Duo Liu and Zili Shao and
H. Howie Huang and Tao Li",
title = "A Novel {ReRAM}-Based Processing-in-Memory
Architecture for Graph Traversal",
journal = j-TOS,
volume = "14",
number = "1",
pages = "9:1--9:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177916",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Graph algorithms such as graph traversal have been
gaining ever-increasing importance in the era of big
data. However, graph processing on traditional
architectures issues many random and irregular memory
accesses, leading to a huge number of data movements
and the consumption of very large amounts of energy. To
minimize the waste of memory bandwidth, we investigate
utilizing processing-in-memory (PIM), combined with
non-volatile metal-oxide resistive random access memory
(ReRAM), to improve both computation and I/O
performance. We propose a new ReRAM-based
processing-in-memory architecture called RPBFS, in
which graph data can be persistently stored and
processed in place. We study the problem of graph
traversal, and we design an efficient graph traversal
algorithm in RPBFS. Benefiting from low data movement
overhead and high bank-level parallel computation,
RPBFS shows a significant performance improvement
compared with both the CPU-based and the GPU-based BFS
implementations. On a suite of real-world graphs, our
architecture yields a speedup in graph traversal
performance of up to 33.8$ \times $, and achieves a
reduction in energy over conventional systems of up to
142.8$ \times $.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yadgar:2018:AFP,
author = "Gala Yadgar and Eitan Yaakobi and Fabio Margaglia and
Yue Li and Alexander Yucovich and Nachum Bundak and
Lior Gilon and Nir Yakovi and Assaf Schuster and
Andr{\'e} Brinkmann",
title = "An Analysis of Flash Page Reuse With {WOM} Codes",
journal = j-TOS,
volume = "14",
number = "1",
pages = "10:1--10:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177886",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Flash memory is prevalent in modern servers and
devices. Coupled with the scaling down of flash
technology, the popularity of flash memory motivates
the search for methods to increase flash reliability
and lifetime. Erasures are the dominant cause of flash
cell wear, but reducing them is challenging because
flash is a write-once medium- memory cells must be
erased prior to writing. An approach that has recently
received considerable attention relies on write-once
memory (WOM) codes, designed to accommodate additional
writes on write-once media. However, the techniques
proposed for reusing flash pages with WOM codes are
limited in their scope. Many focus on the coding theory
alone, whereas others suggest FTL designs that are
application specific, or not applicable due to their
complexity, overheads, or specific constraints of
multilevel cell (MLC) flash. This work is the first
that addresses all aspects of page reuse within an
end-to-end analysis of a general-purpose FTL on MLC
flash. We use a hardware evaluation setup to directly
measure the short- and long-term effects of page reuse
on SSD durability and energy consumption, and show that
FTL design must explicitly take them into account. We
then provide a detailed analytical model for deriving
the optimal garbage collection policy for such FTL
designs, and for predicting the benefit from reuse on
realistic hardware and workload characteristics.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Ho:2018:SLP,
author = "Chien-Chung Ho and Yu-Ming Chang and Yuan-Hao Chang
and Tei-Wei Kuo",
title = "An {SLC}-Like Programming Scheme for {MLC} Flash
Memory",
journal = j-TOS,
volume = "14",
number = "1",
pages = "11:1--11:??",
month = apr,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3129257",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:48 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Although the multilevel cell (MLC) technique is widely
adopted by flash-memory vendors to boost the chip
density and lower the cost, it results in serious
performance and reliability problems. Different from
past work, a new cell programming method is proposed to
not only significantly improve chip performance but
also reduce the potential bit error rate. In
particular, a single-level cell (SLC)-like programming
scheme is proposed to better explore the
threshold-voltage relationship to denote different MLC
bit information, which in turn drastically provides a
larger window of threshold voltage similar to that
found in SLC chips. It could result in less programming
iterations and simultaneously a much less reliability
problem in programming flash-memory cells. In the
experiments, the new programming scheme could
accelerate the programming speed up to 742\% and even
reduce the bit error rate up to 471\% for MLC pages.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hu:2018:FMR,
author = "Xiameng Hu and Xiaolin Wang and Lan Zhou and Yingwei
Luo and Zhenlin Wang and Chen Ding and Chencheng Ye",
title = "Fast Miss Ratio Curve Modeling for Storage Cache",
journal = j-TOS,
volume = "14",
number = "2",
pages = "12:1--12:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3185751",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The reuse distance (least recently used (LRU) stack
distance) is an essential metric for performance
prediction and optimization of storage cache. Over the
past four decades, there have been steady improvements
in the algorithmic efficiency of reuse distance
measurement. This progress is accelerating in recent
years, both in theory and practical implementation. In
this article, we present a kinetic model of LRU cache
memory, based on the average eviction time (AET) of the
cached data. The AET model enables fast measurement and
use of low-cost sampling. It can produce the miss ratio
curve in linear time with extremely low space costs. On
storage trace benchmarks, AET reduces the time and
space costs compared to former techniques. Furthermore,
AET is a composable model that can characterize shared
cache behavior through sampling and modeling individual
programs or traces.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{ZhenJ:2018:CSN,
author = "Zhen ``Jason'' Sun and Geoff Kuenning and Sonam Mandal
and Philip Shilane and Vasily Tarasov and Nong Xiao and
Erez Zadok",
title = "Cluster and Single-Node Analysis of Long-Term
Deduplication Patterns",
journal = j-TOS,
volume = "14",
number = "2",
pages = "13:1--13:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3183890",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Deduplication has become essential in disk-based
backup systems, but there have been few long-term
studies of backup workloads. Most past studies either
were of a small static snapshot or covered only a short
period that was not representative of how a backup
system evolves over time. For this article, we first
collected 21 months of data from a shared user file
system; 33 users and over 4,000 snapshots are covered.
We then analyzed the dataset, examining a variety of
essential characteristics across two dimensions:
single-node deduplication and cluster deduplication.
For single-node deduplication analysis, our primary
focus was individual-user data. Despite apparently
similar roles and behavior among all of our users, we
found significant differences in their deduplication
ratios. Moreover, the data that some users share with
others had a much higher deduplication ratio than
average. For cluster deduplication analysis, we
implemented seven published data-routing algorithms and
created a detailed comparison of their performance with
respect to deduplication ratio, load distribution, and
communication overhead. We found that per-file routing
achieves a higher deduplication ratio than routing by
super-chunk (multiple consecutive chunks), but it also
leads to high data skew (imbalance of space usage
across nodes). We also found that large chunking sizes
are better for cluster deduplication, as they
significantly reduce data-routing overhead, while their
negative impact on deduplication ratios is small and
acceptable. We draw interesting conclusions from both
single-node and cluster deduplication analysis and make
recommendations for future deduplication systems
design.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhou:2018:EEE,
author = "Deng Zhou and Vania Fang and Tao Xie and Wen Pan and
Ram Kesavan and Tony Lin and Naresh Patel",
title = "Empirical Evaluation and Enhancement of Enterprise
Storage System Request Scheduling",
journal = j-TOS,
volume = "14",
number = "2",
pages = "14:1--14:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3193741",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Since little has been reported in the literature
concerning enterprise storage system file-level request
scheduling, we do not have enough knowledge about how
various scheduling factors affect performance.
Moreover, we are in lack of a good understanding on how
to enhance request scheduling to adapt to the changing
characteristics of workloads and hardware resources. To
answer these questions, we first build a request
scheduler prototype based on WAFL\reg, a mainstream
file system running on numerous enterprise storage
systems worldwide. Next, we use the prototype to
quantitatively measure the impact of various scheduling
configurations on performance on a NetApp\reg's
enterprise-class storage system. Several observations
have been made. For example, we discover that in order
to improve performance, the priority of write requests
and non-preempted restarted requests should be boosted
in some workloads. Inspired by these observations, we
further propose two scheduling enhancement heuristics
called SORD (size-oriented request dispatching) and
QATS (queue-depth aware time slicing). Finally, we
evaluate them by conducting a wide range of experiments
using workloads generated by SPC-1 and SFS2014 on both
HDD-based and all-flash platforms. Experimental results
show that the combination of the two can noticeably
reduce average request latency under some workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Teng:2018:LCD,
author = "Dejun Teng and Lei Guo and Rubao Lee and Feng Chen and
Yanfeng Zhang and Siyuan Ma and Xiaodong Zhang",
title = "A Low-cost Disk Solution Enabling {LSM}-tree to
Achieve High Performance for Mixed Read\slash Write
Workloads",
journal = j-TOS,
volume = "14",
number = "2",
pages = "15:1--15:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3162615",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "LSM-tree has been widely used in data management
production systems for write-intensive workloads.
However, as read and write workloads co-exist under
LSM-tree, data accesses can experience long latency and
low throughput due to the interferences to buffer
caching from the compaction, a major and frequent
operation in LSM-tree. After a compaction, the existing
data blocks are reorganized and written to other
locations on disks. As a result, the related data
blocks that have been loaded in the buffer cache are
invalidated since their referencing addresses are
changed, causing serious performance degradations. To
re-enable high-speed buffer caching during intensive
writes, we propose Log-Structured buffered-Merge tree
(simplified as LSbM-tree) by adding a compaction buffer
on disks to minimize the cache invalidations on buffer
cache caused by compactions. The compaction buffer
efficiently and adaptively maintains the frequently
visited datasets. In LSbM, strong locality objects can
be effectively kept in the buffer cache with minimum or
no harmful invalidations. With the help of a small
on-disk compaction buffer, LSbM achieves a high query
performance by enabling effective buffer caching, while
retaining all the merits of LSM-tree for
write-intensive data processing and providing high
bandwidth of disks for range queries. We have
implemented LSbM based on LevelDB. We show that with a
standard buffer cache and a hard disk, LSbM can achieve
2x performance improvement over LevelDB. We have also
compared LSbM with other existing solutions to show its
strong cache effectiveness.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Xiong:2018:CFG,
author = "Qin Xiong and Fei Wu and Zhonghai Lu and Yue Zhu and
You Zhou and Yibing Chu and Changsheng Xie and Ping
Huang",
title = "Characterizing {$3$D} Floating Gate {NAND} Flash:
Observations, Analyses, and Implications",
journal = j-TOS,
volume = "14",
number = "2",
pages = "16:1--16:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3162616",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "As both NAND flash memory manufacturers and users are
turning their attentions from planar architecture
towards three-dimensional (3D) architecture, it becomes
critical and urgent to understand the characteristics
of 3D NAND flash memory. These characteristics,
especially those different from planar NAND flash, can
significantly affect design choices of flash management
techniques. In this article, we present a
characterization study on the state-of-the-art 3D
floating gate (FG) NAND flash memory through
comprehensive experiments on an FPGA-based 3D NAND
flash evaluation platform. We make distinct
observations on its performance and reliability, such
as operation latencies and various error patterns,
followed by careful analyses from physical and
circuit-level perspectives. Although 3D FG NAND flash
provides much higher storage densities than planar NAND
flash, it faces new performance challenges of garbage
collection overhead and program performance variations
and more complicated reliability issues due to, e.g.,
distinct location dependence and value dependence of
errors. We also summarize the differences between 3D FG
NAND flash and planar NAND flash and discuss
implications on the designs of NAND flash management
techniques brought by the architecture innovation. We
believe that our work will facilitate developing novel
3D FG NAND flash-oriented designs to achieve better
performance and reliability.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yoo:2018:OOF,
author = "Jinsoo Yoo and Joontaek Oh and Seongjin Lee and Youjip
Won and Jin-Yong Ha and Jongsung Lee and Junseok Shim",
title = "{OrcFS}: Orchestrated File System for Flash Storage",
journal = j-TOS,
volume = "14",
number = "2",
pages = "17:1--17:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3162614",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "In this work, we develop the Orchestrated File System
(OrcFS) for Flash storage. OrcFS vertically integrates
the log-structured file system and the Flash-based
storage device to eliminate the redundancies across the
layers. A few modern file systems adopt sophisticated
append-only data structures in an effort to optimize
the behavior of the file system with respect to the
append-only nature of the Flash memory. While the
benefit of adopting an append-only data structure seems
fairly promising, it makes the stack of software layers
full of unnecessary redundancies, leaving substantial
room for improvement. The redundancies include (i)
redundant levels of indirection (address translation),
(ii) duplicate efforts to reclaim the invalid blocks
(i.e., segment cleaning in the file system and garbage
collection in the storage device), and (iii) excessive
over-provisioning (i.e., separate over-provisioning
areas in each layer). OrcFS eliminates these
redundancies via distributing the address translation,
segment cleaning (or garbage collection), bad block
management, and wear-leveling across the layers.
Existing solutions suffer from high segment cleaning
overhead and cause significant write amplification due
to mismatch between the file system block size and the
Flash page size. To optimize the I/O stack while
avoiding these problems, OrcFS adopts three key
technical elements. First, OrcFS uses disaggregate
mapping, whereby it partitions the Flash storage into
two areas, managed by a file system and Flash storage,
respectively, with different granularity. In OrcFS, the
metadata area and data area are maintained by 4Kbyte
page granularity and 256Mbyte superblock granularity.
The superblock-based storage management aligns the file
system section size, which is a unit of segment
cleaning, with the superblock size of the underlying
Flash storage. It can fully exploit the internal
parallelism of the underlying Flash storage, exploiting
the sequential workload characteristics of the
log-structured file system. Second, OrcFS adopts
quasi-preemptive segment cleaning to prohibit the
foreground I/O operation from being interfered with by
segment cleaning. The latency to reclaim the free space
can be prohibitive in OrcFS due to its large file
system section size, 256Mbyte. OrcFS effectively
addresses this issue via adopting a polling-based
segment cleaning scheme. Third, the OrcFS introduces
block patching to avoid unnecessary write amplification
in the partial page program. OrcFS is the enhancement
of the F2FS file system. We develop a prototype OrcFS
based on F2FS and server class SSD with modified
firmware (Samsung 843TN). OrcFS reduces the device
mapping table requirement to 1/465 and 1/4 compared
with the page mapping and the smallest mapping scheme
known to the public, respectively. Via eliminating the
redundancy in the segment cleaning and garbage
collection, the OrcFS reduces 1/3 of the write volume
under heavy random write workload. OrcFS achieves 56\%
performance gain against EXT4 in varmail workload.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Vef:2018:CST,
author = "Marc-Andr{\'e} Vef and Vasily Tarasov and Dean
Hildebrand and Andr{\'e} Brinkmann",
title = "Challenges and Solutions for Tracing Storage Systems:
a Case Study with Spectrum Scale",
journal = j-TOS,
volume = "14",
number = "2",
pages = "18:1--18:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3149376",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "IBM Spectrum Scale's parallel file system General
Parallel File System (GPFS) has a 20-year development
history with over 100 contributing developers. Its
ability to support strict POSIX semantics across more
than 10K clients leads to a complex design with
intricate interactions between the cluster nodes.
Tracing has proven to be a vital tool to understand the
behavior and the anomalies of such a complex software
product. However, the necessary trace information is
often buried in hundreds of gigabytes of by-product
trace records. Further, the overhead of tracing can
significantly impact running applications and file
system performance, limiting the use of tracing in a
production system. In this research article, we discuss
the evolution of the mature and highly scalable GPFS
tracing tool and present the exploratory study of GPFS'
new tracing interface, FlexTrace, which allows
developers and users to accurately specify what to
trace for the problem they are trying to solve. We
evaluate our methodology and prototype, demonstrating
that the proposed approach has negligible overhead,
even under intensive I/O workloads and with low-latency
storage devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Kashyap:2018:WCE,
author = "Anil Kashyap",
title = "Workload Characterization for Enterprise Disk Drives",
journal = j-TOS,
volume = "14",
number = "2",
pages = "19:1--19:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3151847",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The article presents an analysis of drive workloads
from enterprise storage systems. The drive workloads
are obtained from field return units from a
cross-section of enterprise storage system vendors and
thus provides a view of the workload characteristics
over a wide spectrum of end-user applications. The
workload parameters that have been characterized
include transfer lengths, access patterns, throughput,
and utilization. The study shows that reads are the
dominant workload accounting for 80\% of the accesses
to the drive. Writes are dominated by short block
random accesses while reads range from random to highly
sequential. A trend analysis over the period 2010-2014
shows that the workload has remained fairly constant
even as the capacities of the drives shipped has
steadily increased. The study shows that the data
stored on disk drives is relatively cold-on average
less than 4\% of the drive capacity is accessed in a
given 2h interval.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Agrawal:2018:ISI,
author = "Nitin Agrawal and Raju Rangaswami",
title = "Introduction to the Special Issue on {USENIX FAST
2018}",
journal = j-TOS,
volume = "14",
number = "3",
pages = "20:1--20:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242152",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "20",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Alagappan:2018:PAR,
author = "Ramnatthan Alagappan and Aishwarya Ganesan and Eric
Lee and Aws Albarghouthi and Vijay Chidambaram and
Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "Protocol-Aware Recovery for Consensus-Based
Distributed Storage",
journal = j-TOS,
volume = "14",
number = "3",
pages = "21:1--21:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241062",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We introduce protocol-aware recovery (P ar), a new
approach that exploits protocol-specific knowledge to
correctly recover from storage faults in distributed
systems. We demonstrate the efficacy of Par through the
design and implementation of corruption-tolerant
replication (Ctrl), a Par mechanism specific to
replicated state machine (RSM) systems. We
experimentally show that the Ctrl versions of two
systems, LogCabin and ZooKeeper, safely recover from
storage faults and provide high availability, while the
unmodified versions can lose data or become
unavailable. We also show that the Ctrl versions
achieve this reliability with little performance
overheads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "21",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhan:2018:EDM,
author = "Yang Zhan and Yizheng Jiao and Donald E. Porter and
Alex Conway and Eric Knorr and Martin Farach-Colton and
Michael A. Bender and Jun Yuan and William Jannen and
Rob Johnson",
title = "Efficient Directory Mutations in a Full-Path-Indexed
File System",
journal = j-TOS,
volume = "14",
number = "3",
pages = "22:1--22:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241061",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Full-path indexing can improve I/O efficiency for
workloads that operate on data organized using
traditional, hierarchical directories, because data is
placed on persistent storage in scan order. Prior
results indicate, however, that renames in a local file
system with full-path indexing are prohibitively
expensive. This article shows how to use full-path
indexing in a file system to realize fast directory
scans, writes, and renames. The article introduces a
range-rename mechanism for efficient key-space changes
in a write-optimized dictionary. This mechanism is
encapsulated in the key-value Application Programming
Interface (API) and simplifies the overall file system
design. We implemented this mechanism in B$^{\& amp;
egr; }$ -trees File System (BetrFS), an in-kernel,
local file system for Linux. This new version, BetrFS
0.4, performs recursive greps 1.5x faster and random
writes 1.2x faster than BetrFS 0.3, but renames are
competitive with indirection-based file systems for a
range of sizes. BetrFS 0.4 outperforms BetrFS 0.3, as
well as traditional file systems, such as ext4, Extents
File System (XFS), and Z File System (ZFS), across a
variety of workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "22",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Gunawi:2018:FSS,
author = "Haryadi S. Gunawi and Riza O. Suminto and Russell
Sears and Casey Golliher and Swaminathan Sundararaman
and Xing Lin and Tim Emami and Weiguang Sheng and
Nematollah Bidokhti and Caitie McCaffrey and Deepthi
Srinivasan and Biswaranjan Panda and Andrew Baptist and
Gary Grider and Parks M. Fields and Kevin Harms and
Robert B. Ross and Andree Jacobson and Robert Ricci and
Kirk Webb and Peter Alvaro and H. Birali Runesha and
Mingzhe Hao and Huaicheng Li",
title = "Fail-Slow at Scale: Evidence of Hardware Performance
Faults in Large Production Systems",
journal = j-TOS,
volume = "14",
number = "3",
pages = "23:1--23:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242086",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Fail-slow hardware is an under-studied failure mode.
We present a study of 114 reports of fail-slow hardware
incidents, collected from large-scale cluster
deployments in 14 institutions. We show that all
hardware types such as disk, SSD, CPU, memory, and
network components can exhibit performance faults. We
made several important observations such as faults
convert from one form to another, the cascading root
causes and impacts can be long, and fail-slow faults
can have varying symptoms. From this study, we make
suggestions to vendors, operators, and systems
designers.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "23",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Won:2018:BOC,
author = "Youjip Won and Joontaek Oh and Jaemin Jung and
Gyeongyeol Choi and Seongbae Son and Jooyoung Hwang and
Sangyeun Cho",
title = "Bringing Order to Chaos: Barrier-Enabled {I/O} Stack
for Flash Storage",
journal = j-TOS,
volume = "14",
number = "3",
pages = "24:1--24:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242091",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "This work is dedicated to eliminating the overhead
required for guaranteeing the storage order in the
modern IO stack. The existing block device adopts a
prohibitively expensive approach in ensuring the
storage order among write requests: interleaving the
write requests with Transfer-and-Flush. For exploiting
the cache barrier command for flash storage, we
overhaul the IO scheduler, the dispatch module, and the
filesystem so that these layers are orchestrated to
preserve the ordering condition imposed by the
application with which the associated data blocks are
made durable. The key ingredients of Barrier-Enabled IO
stack are Epoch-based IO scheduling, Order-Preserving
Dispatch, and Dual-Mode Journaling. Barrier-enabled IO
stack can control the storage order without
Transfer-and-Flush overhead. We implement the
barrier-enabled IO stack in server as well as in mobile
platforms. SQLite performance increases by 270\% and
75\%, in server and in smartphone, respectively. In a
server storage, BarrierFS brings as much as by 43 $
\times $ and by 73$ \times $ performance gain in MySQL
and SQLite, respectively, against EXT4 via relaxing the
durability of a transaction.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "24",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lee:2018:MCM,
author = "Minho Lee and Dong Hyun Kang and Young Ik Eom",
title = "{M-CLOCK}: Migration-optimized Page Replacement
Algorithm for Hybrid Memory Architecture",
journal = j-TOS,
volume = "14",
number = "3",
pages = "25:1--25:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3216730",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Phase Change Memory (PCM) has drawn great attention as
a main memory due to its attractive characteristics
such as non-volatility, byte-addressability, and
in-place update. However, since the capacity of PCM is
not fully mature yet, hybrid memory architecture that
consists of DRAM and PCM has been suggested as a main
memory. In addition, page replacement algorithm based
on hybrid memory architecture is actively being
studied, because existing page replacement algorithms
cannot be used on hybrid memory architecture in that
they do not consider the two weaknesses of PCM: high
write latency and low endurance. In this article, to
mitigate the above hardware limitations of PCM, we
revisit the page cache layer for the hybrid memory
architecture and propose a novel page replacement
algorithm, called M-CLOCK, to improve the performance
of hybrid memory architecture and the lifespan of PCM.
In particular, M-CLOCK aims to reduce the number of PCM
writes that negatively affect the performance of hybrid
memory architecture. Experimental results clearly show
that M-CLOCK outperforms the state-of-the-art page
replacement algorithms in terms of the number of PCM
writes and effective memory access time by up to 98\%
and 9.4 times, respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "25",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Shen:2018:DID,
author = "Zhaoyan Shen and Feng Chen and Yichen Jia and Zili
Shao",
title = "{DIDACache}: an Integration of Device and Application
for Flash-based Key-value Caching",
journal = j-TOS,
volume = "14",
number = "3",
pages = "26:1--26:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3203410",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Key-value caching is crucial to today's low-latency
Internet services. Conventional key-value cache
systems, such as Memcached, heavily rely on expensive
DRAM memory. To lower Total Cost of Ownership, the
industry recently is moving toward more cost-efficient
flash-based solutions, such as Facebook's McDipper [14]
and Twitter's Fatcache [56]. These cache systems
typically take commercial SSDs and adopt a
Memcached-like scheme to store and manage key-value
cache data in flash. Such a practice, though simple, is
inefficient due to the huge semantic gap between the
key-value cache manager and the underlying flash
devices. In this article, we advocate to reconsider the
cache system design and directly open device-level
details of the underlying flash storage for key-value
caching. We propose an enhanced flash-aware key-value
cache manager, which consists of a novel unified
address mapping module, an integrated garbage
collection policy, a dynamic over-provisioning space
management, and a customized wear-leveling policy, to
directly drive the flash management. A thin
intermediate library layer provides a slab-based
abstraction of low-level flash memory space and an API
interface for directly and easily operating flash
devices. A special flash memory SSD hardware that
exposes flash physical details is adopted to store
key-value items. This co-design approach bridges the
semantic gap and well connects the two layers together,
which allows us to leverage both the domain knowledge
of key-value caches and the unique device properties.
In this way, we can maximize the efficiency of
key-value caching on flash devices while minimizing its
weakness. We implemented a prototype, called DIDACache,
based on the Open-Channel SSD platform. Our experiments
on real hardware show that we can significantly
increase the throughput by 35.5\%, reduce the latency
by 23.6\%, and remove unnecessary erase operations by
28\%.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "26",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Luo:2018:WER,
author = "Huizhang Luo and Qing Liu and Jingtong Hu and Qiao Li
and Liang Shi and Qingfeng Zhuge and Edwin H.-M. Sha",
title = "Write Energy Reduction for {PCM} via Pumping
Efficiency Improvement",
journal = j-TOS,
volume = "14",
number = "3",
pages = "27:1--27:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3200139",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The emerging Phase Change Memory (PCM) is considered
to be a promising candidate to replace DRAM as the next
generation main memory due to its higher scalability
and lower leakage power. However, the high write power
consumption has become a major challenge in adopting
PCM as main memory. In addition to the fact that
writing to PCM cells requires high write current and
voltage, current loss in the charge pumps also
contributes a large percentage of high power
consumption. The pumping efficiency of a PCM chip is a
concave function of the write current. Leveraging the
characteristics of the concave function, the overall
pumping efficiency can be improved if the write current
is uniform. In this article, we propose a
peak-to-average (PTA) write scheme, which smooths the
write current fluctuation by regrouping write units. In
particular, we calculate the current requirements for
each write unit by their values when they are evicted
from the last level cache (LLC). When the write units
are waiting in the memory controller, we regroup the
write units by LLC-assisted PTA to reach the
current-uniform goal. Experimental results show that
LLC-assisted PTA achieved 13.4\% of overall energy
saving compared to the baseline.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "27",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yan:2018:RRB,
author = "Wenrui Yan and Jie Yao and Qiang Cao and Changsheng
Xie and Hong Jiang",
title = "{ROS}: a Rack-based Optical Storage System with Inline
Accessibility for Long-Term Data Preservation",
journal = j-TOS,
volume = "14",
number = "3",
pages = "28:1--28:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3231599",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "The combination of the explosive growth in digital
data and the demand to preserve much of these data in
the long term has made it imperative to find a more
cost-effective way than HDD arrays and a more easily
accessible way than tape libraries to store massive
amounts of data. While modern optical discs are capable
of guaranteeing more than 50-year data preservation
without media replacement, individual optical discs'
lack of the performance and capacity relative to HDDs
or tapes has significantly limited their use in
datacenters. This article presents a Rack-scale Optical
disc library System, or ROS in short, which provides a
PB-level total capacity and inline accessibility on
thousands of optical discs built within a 42U Rack. A
rotatable roller and robotic arm separating and
fetching discs are designed to improve disc placement
density and simplify the mechanical structure. A
hierarchical storage system based on SSDs, hard disks,
and optical discs is proposed to effectively hide the
delay of mechanical operation. However, an optical
library file system (OLFS) based on FUSE is proposed to
schedule mechanical operation and organize data on the
tiered storage with a POSIX user interface to provide
an illusion of inline data accessibility. We further
optimize OLFS by reducing unnecessary user/kernel
context switches inheriting from legacy FUSE framework.
We evaluate ROS on a few key performance metrics,
including operation delays of the mechanical structure
and software overhead in a prototype PB-level ROS
system. The results show that ROS stacked on Samba and
FUSE as network-attached storage (NAS) mode almost
saturates the throughput provided by underlying samba
via 10GbE network for external users, as well as in
this scenario provides about 53ms file write and 15ms
read latency, exhibiting its inline accessibility.
Besides, ROS is able to effectively hide and virtualize
internal complex operational behaviors and be easily
deployable in datacenters.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "28",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Desnoyers:2018:ISI,
author = "Peter Desnoyers and Eyal de Lara",
title = "Introduction to the Special Issue on {SYSTOR 2017}",
journal = j-TOS,
volume = "14",
number = "4",
pages = "29:1--29:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3287097",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "29",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Trivedi:2018:FFN,
author = "Animesh Trivedi and Nikolas Ioannou and Bernard
Metzler and Patrick Stuedi and Jonas Pfefferle and
Kornilios Kourtis and Ioannis Koltsidas and Thomas R.
Gross",
title = "{FlashNet}: Flash\slash Network Stack Co-Design",
journal = j-TOS,
volume = "14",
number = "4",
pages = "30:1--30:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239562",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "During the past decade, network and storage devices
have undergone rapid performance improvements,
delivering ultra-low latency and several Gbps of
bandwidth. Nevertheless, current network and storage
stacks fail to deliver this hardware performance to the
applications, often due to the loss of I/O efficiency
from stalled CPU performance. While many efforts
attempt to address this issue solely on either the
network or the storage stack, achieving
high-performance for networked-storage applications
requires a holistic approach that considers both. In
this article, we present FlashNet, a software I/O stack
that unifies high-performance network properties with
flash storage access and management. FlashNet builds on
RDMA principles and abstractions to provide a direct,
asynchronous, end-to-end data path between a client and
remote flash storage. The key insight behind FlashNet
is to co-design the stack's components (an RDMA
controller, a flash controller, and a file system) to
enable cross-stack optimizations and maximize I/O
efficiency. In micro-benchmarks, FlashNet improves 4kB
network I/O operations per second (IOPS by 38.6\% to
1.22M), decreases access latency by 43.5\% to 50.4 $
\mu $ s, and prolongs the flash lifetime by 1.6--5.9$
\times $ for writes. We illustrate the capabilities of
FlashNet by building a Key-Value store and porting a
distributed data store that uses RDMA on it. The use of
FlashNet's RDMA API improves the performance of KV
store by $ 2 \times $ and requires minimum changes for
the ported data store to access remote flash devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "30",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Guz:2018:PCN,
author = "Zvika Guz and Harry (Huan) Li and Anahita Shayesteh
and Vijay Balakrishnan",
title = "Performance Characterization of {NVMe}-over-Fabrics
Storage Disaggregation",
journal = j-TOS,
volume = "14",
number = "4",
pages = "31:1--31:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239563",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Storage disaggregation separates compute and storage
to different nodes to allow for independent resource
scaling and, thus, better hardware resource
utilization. While disaggregation of hard-drives
storage is a common practice, NVMe-SSD (i.e.,
PCIe-based SSD) disaggregation is considered more
challenging. This is because SSDs are significantly
faster than hard drives, so the latency overheads (due
to both network and CPU processing) as well as the
extra compute cycles needed for the offloading stack
become much more pronounced. In this work, we
characterize the overheads of NVMe-SSD disaggregation.
We show that NVMe-over-Fabrics (NVMe-oF)-a recently
released remote storage protocol specification-reduces
the overheads of remote access to a bare minimum, thus
greatly increasing the cost-efficiency of Flash
disaggregation. Specifically, while recent work showed
that SSD storage disaggregation via iSCSI degrades
application-level throughput by 20\%, we report on
negligible performance degradation with NVMe-oF-both
when using stress-tests as well as with a
more-realistic KV-store workload.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "31",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Xie:2018:EIP,
author = "Wei Xie and Yong Chen and Philip C. Roth",
title = "Exploiting Internal Parallelism for Address
Translation in Solid-State Drives",
journal = j-TOS,
volume = "14",
number = "4",
pages = "32:1--32:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239564",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Solid-state Drives (SSDs) have changed the landscape
of storage systems and present a promising storage
solution for data-intensive applications due to their
low latency, high bandwidth, and low power consumption
compared to traditional hard disk drives. SSDs achieve
these desirable characteristics using internal
parallelism -parallel access to multiple internal flash
memory chips-and a Flash Translation Layer (FTL) that
determines where data are stored on those chips so that
they do not wear out prematurely. However, current
state-of-the-art cache-based FTLs like the Demand-based
Flash Translation Layer (DFTL) do not allow IO
schedulers to take full advantage of internal
parallelism, because they impose a tight coupling
between the logical-to-physical address translation and
the data access. To address this limitation, we
introduce a new FTL design called Parallel-DFTL that
works with the DFTL to decouple address translation
operations from data accesses. Parallel-DFTL separates
address translation and data access operations into
different queues, allowing the SSD to use concurrent
flash accesses for both types of operations. We also
present a Parallel-LRU cache replacement algorithm to
improve the concurrency of address translation
operations. To compare Parallel-DFTL against existing
FTL approaches, we present a Parallel-DFTL performance
model and compare its predictions against those for
DFTL and an ideal page-mapping approach. We also
implemented the Parallel-DFTL approach in an SSD
simulator using real device parameters, and used
trace-driven simulation to evaluate Parallel-DFTL's
efficacy. Our evaluation results show that
Parallel-DFTL improved the overall performance by up to
32\% for the real IO workloads we tested, and by up to
two orders of magnitude with synthetic test workloads.
We also found that Parallel-DFTL is able to achieve
reasonable performance with a very small cache size and
that it provides the best benefit for those workloads
with large request size or with high write ratio.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "32",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Pletka:2018:MNG,
author = "Roman Pletka and Ioannis Koltsidas and Nikolas Ioannou
and Sasa Tomi{\'c} and Nikolaos Papandreou and Thomas
Parnell and Haralampos Pozidis and Aaron Fry and Tim
Fisher",
title = "Management of Next-Generation {NAND} Flash to Achieve
Enterprise-Level Endurance and Latency Targets",
journal = j-TOS,
volume = "14",
number = "4",
pages = "33:1--33:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241060",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Despite its widespread use in consumer devices and
enterprise storage systems, NAND flash faces a growing
number of challenges. While technology advances have
helped to increase the storage density and reduce
costs, they have also led to reduced endurance and
larger block variations, which cannot be compensated
solely by stronger ECC or read-retry schemes but have
to be addressed holistically. Our goal is to enable
low-cost NAND flash in enterprise storage for cost
efficiency. We present novel flash-management
approaches that reduce write amplification, achieve
better wear leveling, and enhance endurance without
sacrificing performance. We introduce block
calibration, a technique to determine optimal
read-threshold voltage levels that minimize error
rates, and novel garbage-collection as well as
data-placement schemes that alleviate the effects of
block health variability and show how these techniques
complement one another and thereby achieve enterprise
storage requirements. By combining the proposed
schemes, we improve endurance by up to 15$ \times $
compared to the baseline endurance of NAND flash
without using a stronger ECC scheme. The
flash-management algorithms presented herein were
designed and implemented in simulators, hardware test
platforms, and eventually in the flash controllers of
production enterprise all-flash arrays. Their
effectiveness has been validated across thousands of
customer deployments since 2015.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "33",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Marmol:2018:LSA,
author = "Leonardo Marmol and Mohammad Chowdhury and Raju
Rangaswami",
title = "{LibPM}: Simplifying Application Usage of Persistent
Memory",
journal = j-TOS,
volume = "14",
number = "4",
pages = "34:1--34:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3278141",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "Persistent Memory devices present properties that are
uniquely different from prior technologies for which
applications have been built. Unfortunately, the
conventional approach to building applications fail to
either efficiently utilize these new devices or provide
programmers a seamless development experience. We have
built L ibPM, a Persistent Memory Library that
implements an easy-to-use container abstraction for
consuming PM. LibPM's containers are data hosting units
that can store arbitrarily complex data types while
preserving their integrity and consistency.
Consequently, LibPM's containers provide a generic
interface to applications, allowing applications to
store and manipulate arbitrarily structured data with
strong durability and consistency properties, all
without having to navigate all the myriad pitfalls of
programming PM directly. By providing a simple and
high-performing transactional update mechanism, LibPM
allows applications to manipulate persistent data at
the speed of memory. The container abstraction and
automatic persistent data discovery mechanisms within
LibPM also simplify porting legacy applications to PM.
From a performance perspective, LibPM closely matches
and often exceeds the performance of state-of-the-art
application libraries for PM. For instance, LibPM 's
performance is 195$ \times $ better for write intensive
workloads and 2.6$ \times $ better for read intensive
workloads when compared with the state-of-the-art
Pmem.IO persistent memory library.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "34",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Gatla:2018:TRF,
author = "Om Rameshwar Gatla and Mai Zheng and Muhammad Hameed
and Viacheslav Dubeyko and Adam Manzanares and Filip
Blagojevic and Cyril Guyot and Robert Mateescu",
title = "Towards Robust File System Checkers",
journal = j-TOS,
volume = "14",
number = "4",
pages = "35:1--35:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3281031",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "File systems may become corrupted for many reasons
despite various protection techniques. Therefore, most
file systems come with a checker to recover the file
system to a consistent state. However, existing
checkers are commonly assumed to be able to complete
the repair without interruption, which may not be true
in practice. In this work, we demonstrate via fault
injection experiments that checkers of widely used file
systems (EXT4, XFS, BtrFS, and F2FS) may leave the file
system in an uncorrectable state if the repair
procedure is interrupted unexpectedly. To address the
problem, we first fix the ordering issue in the undo
logging of e2fsck and then build a general logging
library (i.e., rfsck-lib) for strengthening checkers.
To demonstrate the practicality, we integrate rfsck-lib
with existing checkers and create two new checkers:
rfsck-ext, a robust checker for Ext-family file
systems, and rfsck-xfs, a robust checker for XFS file
systems, both of which require only tens of lines of
modification to the original versions. Both rfsck-ext
and rfsck-xfs are resilient to faults in our
experiments. Also, both checkers incur reasonable
performance overhead (i.e., up to 12\%) compared to the
original unreliable versions. Moreover, rfsck-ext
outperforms the patched e2fsck by up to nine times
while achieving the same level of robustness.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "35",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Choi:2018:HFC,
author = "Jin-Yong Choi and Eyee Hyun Nam and Yoon Jae Seong and
Jin Hyuk Yoon and Sookwan Lee and Hong Seok Kim and
Jeongsu Park and Yeong-Jae Woo and Sheayun Lee and Sang
Lyul Min",
title = "{HIL}: a Framework for Compositional {FTL} Development
and Provably-Correct Crash Recovery",
journal = j-TOS,
volume = "14",
number = "4",
pages = "36:1--36:??",
month = dec,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3281030",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:49 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
abstract = "We present a framework called Hierarchically
Interacting Logs (HIL) for constructing Flash
Translation Layers (FTLs). The main goal of the HIL
framework is to heal the Achilles heel -the crash
recovery-of FTLs (hence, its name). Nonetheless, the
framework itself is general enough to encompass not
only block-mapped and page-mapped FTLs but also many of
their variants, including hybrid ones, because of its
compositional nature. Crash recovery within the HIL
framework proceeds in two phases: structural recovery
and functional recovery. During the structural
recovery, residual effects due to program operations
ongoing at the time of the crash are eliminated in an
atomic manner using shadow paging. During the
functional recovery, operations that would have been
performed if there had been no crash are replayed in a
redo-only fashion. Both phases operate in an idempotent
manner, preventing repeated crashes during recovery
from causing any additional problems. We demonstrate
the practicality of the proposed HIL framework by
implementing a prototype and showing that its
performance during normal execution and also during
crash recovery is at least as good as those of
state-of-the-art SSDs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "36",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{May:2019:LF,
author = "Michael J. May and Etamar Laron and Khalid Zoabi and
Havah Gerhardt",
title = "On the Lifecycle of the File",
journal = j-TOS,
volume = "15",
number = "1",
pages = "1:1--1:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3295463",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3295463",
abstract = "Users and Operating Systems (OSs) have vastly
different views of files. OSs use files to persist data
and structured information. To accomplish this, OSs
treat files as named collections of bytes managed in
hierarchical file systems. Despite their critical role
in computing, little attention is paid to the lifecycle
of the file, the evolution of file contents, or the
evolution of file metadata. In contrast, users have
rich mental models of files: they group files into
projects, send data repositories to others, work on
documents over time, and stash them aside for future
use. Current OSs and Revision Control Systems ignore
such mental models, persisting a selective, manually
designated history of revisions. Preserving the mental
model allows applications to better match how users
view their files, making file processing and archiving
tools more effective. We propose two mechanisms that
OSs can adopt to better preserve the mental model: File
Lifecycle Events (FLEs) that record a file's
progression and Complex File Events (CFEs) that combine
them into meaningful patterns. We present the Complex
File Events Engine (CoFEE), which uses file system
monitoring and an extensible rulebase (Drools) to
detect FLEs and convert them into complex ones. CFEs
are persisted in NoSQL stores for later querying.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Noh:2019:ATD,
author = "Sam H. Noh",
title = "{ACM TOS} Distinguished Reviewers",
journal = j-TOS,
volume = "15",
number = "1",
pages = "1:1--1:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3313879",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3313879",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1e",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Luby:2019:LCS,
author = "Michael Luby and Roberto Padovani and Thomas J.
Richardson and Lorenz Minder and Pooja Aggarwal",
title = "Liquid Cloud Storage",
journal = j-TOS,
volume = "15",
number = "1",
pages = "2:1--2:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3281276",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3281276",
abstract = "A liquid system provides durable object storage based
on spreading redundantly generated data across a
network of hundreds to thousands of potentially
unreliable storage nodes. A liquid system uses a
combination of a large code, lazy repair, and flow
storage organization. We show that a liquid system can
be operated to enable flexible and essentially optimal
combinations of storage durability, storage overhead,
repair bandwidth usage, and access performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2019:LGF,
author = "Yiming Zhang and Dongsheng Li and Ling Liu",
title = "Leveraging Glocality for Fast Failure Recovery in
Distributed {RAM} Storage",
journal = j-TOS,
volume = "15",
number = "1",
pages = "3:1--3:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3289604",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3289604",
abstract = "Distributed RAM storage aggregates the RAM of servers
in data center networks (DCN) to provide extremely high
I/O performance for large-scale cloud systems. For
quick recovery of storage server failures, MemCube [53]
exploits the proximity of the BCube network to limit
the recovery traffic to the recovery servers' 1-hop
neighborhood. However, the previous design is
applicable only to the symmetric BCube( n, k ) network
with n$^{k + 1}$ nodes and has suboptimal recovery
performance due to congestion and contention. To
address these problems, in this article, we propose
CubeX, which (i) generalizes the ``1-hop'' principle of
MemCube for arbitrary cube-based networks and (ii)
improves the throughput and recovery performance of
RAM-based key-value (KV) store via cross-layer
optimizations. At the core of CubeX is to leverage the
glocality (= globality + locality) of cube-based
networks: It scatters backup data across a large number
of disks globally distributed throughout the cube and
restricts all recovery traffic within the small local
range of each server node. Our evaluation shows that
CubeX not only efficiently supports RAM-based KV store
for cube-based networks but also significantly
outperforms MemCube and RAMCloud in both throughput and
recovery time.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Cao:2019:TTA,
author = "Zhichao Cao and Hao Wen and Xiongzi Ge and Jingwei Ma
and Jim Diehl and David H. C. Du",
title = "{TDDFS}: a Tier-Aware Data Deduplication-Based File
System",
journal = j-TOS,
volume = "15",
number = "1",
pages = "4:1--4:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3295461",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3295461",
abstract = "With the rapid increase in the amount of data produced
and the development of new types of storage devices,
storage tiering continues to be a popular way to
achieve a good tradeoff between performance and
cost-effectiveness. In a basic two-tier storage system,
a storage tier with higher performance and typically
higher cost (the fast tier) is used to store
frequently-accessed (active) data while a large amount
of less-active data are stored in the lower-performance
and low-cost tier (the slow tier). Data are migrated
between these two tiers according to their activity. In
this article, we propose a Tier-aware Data
Deduplication-based File System, called TDDFS, which
can operate efficiently on top of a two-tier storage
environment. Specifically, to achieve better
performance, nearly all file operations are performed
in the fast tier. To achieve higher cost-effectiveness,
files are migrated from the fast tier to the slow tier
if they are no longer active, and this migration is
done with data deduplication. The distinctiveness of
our design is that it maintains the non-redundant
(unique) chunks produced by data deduplication in both
tiers if possible. When a file is reloaded (called a
reloaded file) from the slow tier to the fast tier, if
some data chunks of the file already exist in the fast
tier, then the data migration of these chunks from the
slow tier can be avoided. Our evaluation shows that
TDDFS achieves close to the best overall performance
among various file-tiering designs for two-tier storage
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Yadgar:2019:ISI,
author = "Gala Yadgar and Donald E. Porter",
title = "Introduction to the Special Issue on {ACM
International Systems and Storage Conference (SYSTOR)
2018}",
journal = j-TOS,
volume = "15",
number = "1",
pages = "5:1--5:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3313898",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3313898",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Saad:2019:LPD,
author = "Mohamed M. Saad and Roberto Palmieri and Binoy
Ravindran",
title = "{Lerna}: Parallelizing Dependent Loops Using
Speculation",
journal = j-TOS,
volume = "15",
number = "1",
pages = "6:1--6:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310368",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310368",
abstract = "We present Lerna, an end-to-end tool that
automatically and transparently detects and extracts
parallelism from data-dependent sequential loops. Lerna
uses speculation combined with a set of techniques
including code profiling, dependency analysis,
instrumentation, and adaptive execution. Speculation is
needed to avoid conservative actions and detect actual
conflicts. Lerna targets applications that are
hard-to-parallelize due to data dependency. Our
experimental study involves the parallelization of 13
applications with data dependencies. Results on a
24-core machine show an average of 2.7$ \times $
speedup for micro-benchmarks and 2.5$ \times $ for the
macro-benchmarks.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Pei:2019:RPU,
author = "Shuyi Pei and Jing Yang and Qing Yang",
title = "{REGISTOR}: a Platform for Unstructured Data
Processing Inside {SSD} Storage",
journal = j-TOS,
volume = "15",
number = "1",
pages = "7:1--7:??",
month = apr,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310149",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310149",
abstract = "This article presents REGISTOR, a platform for regular
expression grabbing inside storage. The main idea of
Registor is accelerating regular expression (regex)
search inside storage where large data set is stored,
eliminating the I/O bottleneck problem. A special
hardware engine for regex search is designed and
augmented inside a flash SSD that processes data
on-the-fly during data transmission from NAND flash to
host. To make the speed of regex search match the
internal bus speed of a modern SSD, a deep pipeline
structure is designed in Registor hardware consisting
of a file semantics extractor, matching candidates
finder, regex matching units (REMUs), and results
organizer. Furthermore, each stage of the pipeline
makes the use of maximal parallelism possible. To make
Registor readily usable by high-level applications, we
have developed a set of APIs and libraries in Linux
allowing Registor to process files in the SSD by
recombining separate data blocks into files
efficiently. A working prototype of Registor has been
built in our newly designed NVMe-SSD. Extensive
experiments and analyses have been carried out to show
that Registor achieves high throughput, reduces the I/O
bandwidth requirement by up to 97\%, and reduces CPU
utilization by as much as 82\% for regex search in
large datasets.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Gunawi:2019:ISS,
author = "Haryadi Gunawi and Benjamin Reed",
title = "Introduction to the Special Section on the 2018
{USENIX} Annual Technical Conference {(ATC'18)}",
journal = j-TOS,
volume = "15",
number = "2",
pages = "8:1--8:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3322100",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3322100",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Hu:2019:TLF,
author = "Yige Hu and Zhiting Zhu and Ian Neal and Youngjin Kwon
and Tianyu Cheng and Vijay Chidambaram and Emmett
Witchel",
title = "{TxFS}: Leveraging File-system Crash Consistency to
Provide {ACID} Transactions",
journal = j-TOS,
volume = "15",
number = "2",
pages = "9:1--9:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3318159",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3318159",
abstract = "We introduce TxFS, a transactional file system that
builds upon a file system's atomic-update mechanism
such as journaling. Though prior work has explored a
number of transactional file systems, TxFS has a unique
set of properties: a simple API, portability across
different hardware, high performance, low complexity
(by building on the file-system journal), and full ACID
transactions. We port SQLite, OpenLDAP, and Git to use
TxFS and experimentally show that TxFS provides strong
crash consistency while providing equal or better
performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhang:2019:CDS,
author = "Yu Zhang and Jin Zhao and Xiaofei Liao and Hai Jin and
Lin Gu and Haikun Liu and Bingsheng He and Ligang He",
title = "{CGraph}: a Distributed Storage and Processing System
for Concurrent Iterative Graph Analysis Jobs",
journal = j-TOS,
volume = "15",
number = "2",
pages = "10:1--10:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319406",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3319406",
abstract = "Distributed graph processing platforms usually need to
handle massive Concurrent iterative Graph Processing
(CGP) jobs for different purposes. However, existing
distributed systems face high ratio of data access cost
to computation for the CGP jobs, which incurs low
throughput. We observed that there are strong spatial
and temporal correlations among the data accesses
issued by different CGP jobs, because these
concurrently running jobs usually need to repeatedly
traverse the shared graph structure for the iterative
processing of each vertex. Based on this observation,
this article proposes a distributed storage and
processing system CGraph for the CGP jobs to
efficiently handle the underlying static/evolving graph
for high throughput. It uses a data-centric
load-trigger-pushing model, together with several
optimizations, to enable the CGP jobs to efficiently
share the graph structure data in the cache/memory and
their accesses by fully exploiting such correlations,
where the graph structure data is decoupled from the
vertex state associated with each job. It can deliver
much higher throughput for the CGP jobs by effectively
reducing their average ratio of data access cost to
computation. Experimental results show that CGraph
improves the throughput of the CGP jobs by up to 3.47$
\times $ in comparison with existing solutions on
distributed platforms.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zhu:2019:STS,
author = "Tao Zhu and Zhuoyue Zhao and Feifei Li and Weining
Qian and Aoying Zhou and Dong Xie and Ryan Stutsman and
Haining Li and Huiqi Hu",
title = "{SolarDB}: Toward a Shared-Everything Database on
Distributed Log-Structured Storage",
journal = j-TOS,
volume = "15",
number = "2",
pages = "11:1--11:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3318158",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3318158",
abstract = "Efficient transaction processing over large databases
is a key requirement for many mission-critical
applications. Although modern databases have achieved
good performance through horizontal partitioning, their
performance deteriorates when cross-partition
distributed transactions have to be executed. This
article presents SolarDB, a distributed relational
database system that has been successfully tested at a
large commercial bank. The key features of SolarDB
include (1) a shared-everything architecture based on a
two-layer log-structured merge-tree; (2) a new
concurrency control algorithm that works with the
log-structured storage, which ensures efficient and
non-blocking transaction processing even when the
storage layer is compacting data among nodes in the
background; and (3) find-grained data access to
effectively minimize and balance network communication
within the cluster. According to our empirical
evaluations on TPC-C, Smallbank, and a real-world
workload, SolarDB outperforms the existing
shared-nothing systems by up to 50x when there are
close to or more than 5\% distributed transactions.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Arpaci-Dusseau:2019:ISS,
author = "Andrea Arpaci-Dusseau and Geoffrey M. Voelker",
title = "Introduction to the Special Section on {OSDI'18}",
journal = j-TOS,
volume = "15",
number = "2",
pages = "12:1--12:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3322101",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3322101",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Zuo:2019:LHH,
author = "Pengfei Zuo and Yu Hua and Jie Wu",
title = "Level Hashing: a High-performance and
Flexible-resizing Persistent Hashing Index Structure",
journal = j-TOS,
volume = "15",
number = "2",
pages = "13:1--13:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3322096",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3322096",
abstract = "Non-volatile memory (NVM) technologies as persistent
memory are promising candidates to complement or
replace DRAM for building future memory systems, due to
having the advantages of high density, low power, and
non-volatility. In main memory systems, hashing index
structures are fundamental building blocks to provide
fast query responses. However, hashing index structures
originally designed for dynamic random access memory
(DRAM) become inefficient for persistent memory due to
new challenges including hardware limitations of NVM
and the requirement of data consistency. To address
these challenges, this article proposes level hashing,
a write-optimized and high-performance hashing index
scheme with low-overhead consistency guarantee and
cost-efficient resizing. Level hashing provides a
sharing-based two-level hash table, which achieves
constant-scale worst-case time complexity for search,
insertion, deletion, and update operations, and rarely
incurs extra NVM writes. To guarantee the consistency
with low overhead, level hashing leverages log-free
consistency schemes for deletion, insertion, and
resizing operations, and an opportunistic log-free
scheme for update operation. To cost-efficiently resize
this hash table, level hashing leverages an in-place
resizing scheme that only needs to rehash 1/3 of
buckets instead of the entire table to expand a hash
table and rehash 2/3 of buckets to shrink a hash table,
thus significantly improving the resizing performance
and reducing the number of rehashed buckets. Extensive
experimental results show that the level hashing speeds
up insertions by 1.4$ \times $-3.0$ \times $, updates
by 1.2$ \times $-2.1$ \times $, expanding by over 4.3$
\times $, and shrinking by over 1.4$ \times $ while
maintaining high search and deletion performance
compared with start-of-the-art hashing schemes.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Mohan:2019:CAS,
author = "Jayashree Mohan and Ashlie Martinez and Soujanya
Ponnapalli and Pandian Raju and Vijay Chidambaram",
title = "{CrashMonkey} and {ACE}: Systematically Testing
File-System Crash Consistency",
journal = j-TOS,
volume = "15",
number = "2",
pages = "14:1--14:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3320275",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3320275",
abstract = "We present C rashMonkey and Ace, a set of tools to
systematically find crash-consistency bugs in Linux
file systems. CrashMonkey is a record-and-replay
framework which tests a given workload on the target
file system by simulating power-loss crashes while the
workload is being executed, and checking if the file
system recovers to a correct state after each crash.
Ace automatically generates all the workloads to be run
on the target file system. We build CrashMonkey and Ace
based on a new approach to test file-system crash
consistency: bounded black-box crash testing ( B$^3$ ).
B$^3$ tests the file system in a black-box manner using
workloads of file-system operations. Since the space of
possible workloads is infinite, B$^3$ bounds this space
based on parameters such as the number of file-system
operations or which operations to include, and
exhaustively generates workloads within this bounded
space. B$^3$ builds upon insights derived from our
study of crash-consistency bugs reported in Linux file
systems in the last 5 years. We observed that most
reported bugs can be reproduced using small workloads
of three or fewer file-system operations on a newly
created file system, and that all reported bugs result
from crashes after fsync()-related system calls.
CrashMonkey and Ace are able to find 24 out of the 26
crash-consistency bugs reported in the last 5 years.
Our tools also revealed 10 new crash-consistency bugs
in widely used, mature Linux file systems, 7 of which
existed in the kernel since 2014. Additionally, our
tools found a crash-consistency bug in a verified file
system, FSCQ. The new bugs result in severe
consequences like broken rename atomicity, loss of
persisted files and directories, and data loss.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Vangoor:2019:PRU,
author = "Bharath Kumar Reddy Vangoor and Prafful Agarwal and
Manu Mathew and Arun Ramachandran and Swaminathan
Sivaraman and Vasily Tarasov and Erez Zadok",
title = "Performance and Resource Utilization of {FUSE}
User-Space File Systems",
journal = j-TOS,
volume = "15",
number = "2",
pages = "15:1--15:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310148",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310148",
abstract = "Traditionally, file systems were implemented as part
of operating systems kernels, which provide a limited
set of tools and facilities to a programmer. As the
complexity of file systems grew, many new file systems
began being developed in user space. Low performance is
considered the main disadvantage of user-space file
systems but the extent of this problem has never been
explored systematically. As a result, the topic of
user-space file systems remains rather controversial:
while some consider user-space file systems a ``toy''
not to be used in production, others develop
full-fledged production file systems in user space. In
this article, we analyze the design and implementation
of a well-known user-space file system framework, FUSE,
for Linux. We characterize its performance and resource
utilization for a wide range of workloads. We present
FUSE performance and also resource utilization with
various mount and configuration options, using 45
different workloads that were generated using Filebench
on two different hardware configurations. We
instrumented FUSE to extract useful statistics and
traces, which helped us analyze its performance
bottlenecks and present our analysis results. Our
experiments indicate that depending on the workload and
hardware used, performance degradation (throughput)
caused by FUSE can be completely imperceptible or as
high as -83\%, even when optimized; and latencies of
FUSE file system operations can be increased from none
to 4$ \times $ when compared to Ext4. On the resource
utilization side, FUSE can increase relative CPU
utilization by up to 31\% and underutilize disk
bandwidth by as much as -80\% compared to Ext4, though
for many data-intensive workloads the impact was
statistically indistinguishable. Our conclusion is that
user-space file systems can indeed be used in
production (non-``toy'') settings, but their
applicability depends on the expected workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wen:2019:CTS,
author = "Weidong Wen and Yang Li and Wenhai Li and Lingfeng
Deng and Yanxiang He",
title = "{CORES}: Towards Scan-Optimized Columnar Storage for
Nested Records",
journal = j-TOS,
volume = "15",
number = "3",
pages = "16:1--16:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321704",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3321704",
abstract = "The relatively high cost of record deserialization is
increasingly becoming the bottleneck of column-based
storage systems in tree-structured applications [58].
Due to record transformation in the storage layer,
unnecessary processing costs derived from fields and
rows irrelevant to queries may be very heavy in nested
schemas, significantly wasting the computational
resources in large-scale analytical workloads. This
leads to the question of how to reduce both the
deserialization and IO costs of queries with highly
selective filters following arbitrary paths in a nested
schema. We present CORES (Column-Oriented Regeneration
Embedding Scheme) to push highly selective filters down
into column-based storage engines, where each filter
consists of several filtering conditions on a field. By
applying highly selective filters in the storage layer,
we demonstrate that both the deserialization and IO
costs could be significantly reduced. We show how to
introduce fine-grained composition on filtering
results. We generalize this technique by two pair-wise
operations, rollup and drilldown, such that a series of
conjunctive filters can effectively deliver their
payloads in nested schema. The proposed methods are
implemented on an open-source platform. For practical
purposes, we highlight how to build a column storage
engine and how to drive a query efficiently based on a
cost model. We apply this design to the nested
relational model especially when hierarchical entities
are frequently required by ad hoc queries. The
experiments, including a real workload and the modified
TPCH benchmark, demonstrate that CORES improves the
performance by 0.7$ \times $--26.9$ \times $ compared
to state-of-the-art platforms in scan-intensive
workloads.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Lu:2019:MSO,
author = "Youyou Lu and Jiwu Shu and Jiacheng Zhang",
title = "Mitigating Synchronous {I/O} Overhead in File Systems
on Open-Channel {SSDs}",
journal = j-TOS,
volume = "15",
number = "3",
pages = "17:1--17:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319369",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3319369",
abstract = "Synchronous I/O has long been a design challenge in
file systems. Although open-channel solid state drives
(SSDs) provide better performance and endurance to file
systems, they still suffer from synchronous I/Os due to
the amplified writes and worse hot/cold data grouping.
The reason lies in the controversy design choices
between flash write and read/erase operations. While
fine-grained logging improves performance and endurance
in writes, it hurts indexing and data grouping
efficiency in read and erase operations. In this
article, we propose a flash-friendly data layout by
introducing a built-in persistent staging layer to
provide balanced read, write, and garbage collection
performance. Based on this, we design a new flash file
system (FS) named StageFS, which decouples the content
and structure updates. Content updates are logically
logged to the staging layer in a persistence-efficient
way, which achieves better write performance and lower
write amplification. The updated contents are
reorganized into the normal data area for structure
updates, with improved hot/cold grouping and in a
page-level indexing way, which is more friendly to read
and garbage collection operations. Evaluation results
show that, compared to recent flash-friendly file
system (F2FS), StageFS effectively improves performance
by up to 211.4\% and achieves low garbage collection
overhead for workloads with frequent synchronization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2019:ESS,
author = "Yin Li and Xubin Chen and Ning Zheng and Jingpeng Hao
and Tong Zhang",
title = "An Exploratory Study on Software-Defined Data Center
Hard Disk Drives",
journal = j-TOS,
volume = "15",
number = "3",
pages = "18:1--18:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319405",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3319405",
abstract = "This article presents a design framework aiming to
reduce mass data storage cost in data centers. Its
underlying principle is simple: Assume one may
noticeably reduce the HDD manufacturing cost by
significantly (i.e., at least several orders of
magnitude) relaxing raw HDD reliability, which ensures
the eventual data storage integrity via low-cost
system-level redundancy. This is called system-assisted
HDD bit cost reduction. To better utilize both capacity
and random IOPS of HDDs, it is desirable to mix data
with complementary requirements on capacity and random
IOPS in each HDD. Nevertheless, different capacity and
random IOPS requirements may demand different raw HDD
reliability vs. bit cost trade-offs and hence different
forms of system-assisted bit cost reduction. This
article presents a software-centric design framework to
realize data-adaptive system-assisted bit cost
reduction for data center HDDs. Implementation is
solely handled by the filesystem and demands only minor
change of the error correction coding (ECC) module
inside HDDs. Hence, it is completely transparent to all
the other components in the software stack (e.g.,
applications, OS kernel, and drivers) and keeps
fundamental HDD design practice (e.g., firmware, media,
head, and servo) intact. We carried out analysis and
experiments to evaluate its implementation feasibility
and effectiveness. We integrated the design techniques
into ext4 to further quantitatively measure its impact
on system speed performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Xie:2019:ZZB,
author = "Xuchao Xie and Liquan Xiao and David H. C. Du",
title = "{ZoneTier}: a Zone-based Storage Tiering and Caching
Co-design to Integrate {SSDs} with {SMR} Drives",
journal = j-TOS,
volume = "15",
number = "3",
pages = "19:1--19:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3335548",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3335548",
abstract = "Integrating solid-state drives (SSDs) and host-aware
shingled magnetic recording (HA-SMR) drives can
potentially build a cost-effective high-performance
storage system. However, existing SSD tiering and
caching designs in such a hybrid system are not fully
matched with the intrinsic properties of HA-SMR drives
due to their lacking consideration of how to handle
non-sequential writes (NSWs). We propose ZoneTier, a
zone-based storage tiering and caching co-design, to
effectively control all the NSWs by leveraging the
host-aware property of HA-SMR drives. ZoneTier exploits
real-time data layout of SMR zones to optimize zone
placement, reshapes NSWs generated from zone demotions
to SMR preferred sequential writes, and transforms the
inevitable NSWs to cleaning-friendly write traffics for
SMR zones. ZoneTier can be easily extended to match
host-managed SMR drives using proactive cleaning
policy. We implemented a prototype of ZoneTier with
user space data management algorithms and real SSD and
HA-SMR drives, which are manipulated by the functions
provided by libzbc and libaio. Our experiments show
that ZoneTier can reduce zone relocation overhead by
29.41\% on average, shorten performance recovery time
of HA-SMR drives from cleaning by up to 33.37\%, and
improve performance by up to 32.31\% than existing
hybrid storage designs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Li:2019:EEU,
author = "Yongkun Li and Helen H. W. Chan and Patrick P. C. Lee
and Yinlong Xu",
title = "Enabling Efficient Updates in {KV} Storage via
Hashing: Design and Performance Evaluation",
journal = j-TOS,
volume = "15",
number = "3",
pages = "20:1--20:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3340287",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3340287",
abstract = "Persistent key-value (KV) stores mostly build on the
Log-Structured Merge (LSM) tree for high write
performance, yet the LSM-tree suffers from the
inherently high I/O amplification. KV separation
mitigates I/O amplification by storing only keys in the
LSM-tree and values in separate storage. However, the
current KV separation design remains inefficient under
update-intensive workloads due to its high garbage
collection (GC) overhead in value storage. We propose
HashKV, which aims for high update performance atop KV
separation under update-intensive workloads. HashKV
uses hash-based data grouping, which deterministically
maps values to storage space to make both updates and
GC efficient. We further relax the restriction of such
deterministic mappings via simple but useful design
extensions. We extensively evaluate various design
aspects of HashKV. We show that HashKV achieves 4.6$
\times $ update throughput and 53.4\% less write
traffic compared to the current KV separation design.
In addition, we demonstrate that we can integrate the
design of HashKV with state-of-the-art KV stores and
improve their respective performance.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "20",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Wang:2019:AAD,
author = "Ji Wang and Weidong Bao and Lei Zheng and Xiaomin Zhu
and Philip S. Yu",
title = "An Attention-augmented Deep Architecture for Hard
Drive Status Monitoring in Large-scale Storage
Systems",
journal = j-TOS,
volume = "15",
number = "3",
pages = "21:1--21:??",
month = aug,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3340290",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Sep 21 07:58:50 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3340290",
abstract = "Data centers equipped with large-scale storage systems
are critical infrastructures in the era of big data.
The enormous amount of hard drives in storage systems
magnify the failure probability, which may cause
tremendous loss for both data service users and
providers. Despite a set of reactive fault-tolerant
measures such as RAID, it is still a tough issue to
enhance the reliability of large-scale storage systems.
Proactive prediction is an effective method to avoid
possible hard-drive failures in advance. A series of
models based on the SMART statistics have been proposed
to predict impending hard-drive failures. Nonetheless,
there remain some serious yet unsolved challenges like
the lack of explainability of prediction results. To
address these issues, we carefully analyze a dataset
collected from a real-world large-scale storage system
and then design an attention-augmented deep
architecture for hard-drive health status assessment
and failure prediction. The deep architecture, composed
of a feature integration layer, a temporal dependency
extraction layer, an attention layer, and a
classification layer, cannot only monitor the status of
hard drives but also assist in failure cause diagnoses.
The experiments based on real-world datasets show that
the proposed deep architecture is able to assess the
hard-drive status and predict the impending failures
accurately. In addition, the experimental results
demonstrate that the attention-augmented deep
architecture can reveal the degradation progression of
hard drives automatically and assist administrators in
tracing the cause of hard drive failures.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "21",
fjournal = "ACM Transactions on Storage",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J960",
}
@Article{Anonymous:2020:EM,
author = "Anonymous",
title = "{EIC} Message",
journal = j-TOS,
volume = "15",
number = "4",
pages = "1--2",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372345",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372345",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Merchant:2020:ISI,
author = "Arif Merchant and Hakim Weatherspoon",
title = "Introduction to the Special Issue on {USENIX FAST
2019}",
journal = j-TOS,
volume = "15",
number = "4",
pages = "22e:1--22e:1",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372347",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372347",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "22e",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Sivathanu:2020:ICF,
author = "Muthian Sivathanu and Midhul Vuppalapati and Bhargav
S. Gulavani and Kaushik Rajan and Jyoti Leeka and
Jayashree Mohan and Piyus Kedia",
title = "{INSTalytics}: Cluster Filesystem Co-design for
Big-data Analytics",
journal = j-TOS,
volume = "15",
number = "4",
pages = "23:1--23:30",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369738",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369738",
abstract = "We present the design, implementation, and evaluation
of INSTalytics, a co-designed stack of a cluster file
system and the compute layer, for efficient big-data
analytics in large-scale data centers. INSTalytics
amplifies the well-known benefits of data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "23",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Harnik:2020:SVC,
author = "Danny Harnik and Moshik Hershcovitch and Yosef Shatsky
and Amir Epstein and Ronen Kat",
title = "Sketching Volume Capacities in Deduplicated Storage",
journal = j-TOS,
volume = "15",
number = "4",
pages = "24:1--24:23",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369737",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369737",
abstract = "The adoption of deduplication in storage systems has
introduced significant new challenges for storage
management. Specifically, the physical capacities
associated with volumes are no longer readily
available. In this work, we introduce a new approach
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "24",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kesavan:2020:CFE,
author = "Ram Kesavan and Matthew Curtis-Maury and Vinay Devadas
and Kesari Mishra",
title = "Countering Fragmentation in an Enterprise Storage
System",
journal = j-TOS,
volume = "15",
number = "4",
pages = "25:1--25:35",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3366173",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3366173",
abstract = "As a file system ages, it can experience multiple
forms of fragmentation. Fragmentation of the free space
in the file system can lower write performance and
subsequent read performance. Client operations as well
as internal operations, such as \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "25",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Xie:2020:COB,
author = "Bing Xie and Sarp Oral and Christopher Zimmer and Jong
Youl Choi and David Dillow and Scott Klasky and Jay
Lofstead and Norbert Podhorszki and Jeffrey S. Chase",
title = "Characterizing Output Bottlenecks of a Production
Supercomputer: Analysis and Implications",
journal = j-TOS,
volume = "15",
number = "4",
pages = "26:1--26:39",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3335205",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/super.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3335205",
abstract = "This article studies the I/O write behaviors of the
Titan supercomputer and its Lustre parallel file stores
under production load. The results can inform the
design, deployment, and configuration of file systems
along with the design of I/O software in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "26",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhang:2020:DDD,
author = "Guangyan Zhang and Zhufan Wang and Xiaosong Ma and
Songlin Yang and Zican Huang and Weimin Zheng",
title = "Determining Data Distribution for Large Disk
Enclosures with {$3$-D} Data Templates",
journal = j-TOS,
volume = "15",
number = "4",
pages = "27:1--27:38",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3342858",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3342858",
abstract = "Conventional RAID solutions with fixed layouts
partition large disk enclosures so that each RAID group
uses its own disks exclusively. This achieves good
performance isolation across underlying disk groups, at
the cost of disk under-utilization and slow \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "27",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kang:2020:LVC,
author = "Dong Hyun Kang and Sang-Won Lee and Young Ik Eom",
title = "{LDJ}: Version Consistency Is Almost Free on
Commercial Storage Devices",
journal = j-TOS,
volume = "15",
number = "4",
pages = "28:1--28:20",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365918",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365918",
abstract = "In this article, we propose a simple but practical and
efficient optimization scheme for journaling in ext4,
called lightweight data journaling (LDJ). By
compressing journaled data prior to writing, LDJ can
perform comparable to or even faster than the default
ordered journaling (OJ) mode in ext4 on top of both
HDDs and flash storage devices, while still
guaranteeing the version consistency of the data
journaling (DJ) mode. This surprising result can be
explained with three main reasons. First, on modern
storage devices, the sequential write pattern
dominating in DJ mode is more and more high-performant
than the random one in OJ mode. Second, the compression
significantly reduces the amount of journal writes,
which will in turn make the write completion faster and
prolong the lifespan of storage devices. Third, the
compression also enables the atomicity of each journal
write without issuing an intervening FLUSH command
between journal data blocks and commit block, thus
halving the number of costly FLUSH calls in LDJ. We
have prototyped our LDJ by slightly modifying the
existing ext4 with jbd2 for journaling and also e2fsck
for recovery; less than 300 lines of source code were
changed. Also, we carried out a comprehensive
evaluation using four standard benchmarks and three
real applications. Our evaluation results clearly show
that LDJ outperforms the OJ mode by up to $ 9.6 \times
$ on the real applications.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "28",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kumar:2020:GDS,
author = "Pradeep Kumar and H. Howie Huang",
title = "{GraphOne}: a Data Store for Real-time Analytics on
Evolving Graphs",
journal = j-TOS,
volume = "15",
number = "4",
pages = "29:1--29:40",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3364180",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Thu Feb 6 08:15:19 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3364180",
abstract = "There is a growing need to perform a diverse set of
real-time analytics (batch and stream analytics) on
evolving graphs to deliver the values of big data to
users. The key requirement from such applications is to
have a data store to support their \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "29",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Malkhi:2020:ISS,
author = "Dahlia Malkhi and Dan Tsafrir",
title = "Introduction to the Special Section on {USENIX ATC
2019}",
journal = j-TOS,
volume = "16",
number = "1",
pages = "1:1--1:1",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383194",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Apr 8 11:43:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383194",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Jaffer:2020:RMF,
author = "Shehbaz Jaffer and Stathis Maneas and Andy Hwang and
Bianca Schroeder",
title = "The Reliability of Modern File Systems in the face of
{SSD} Errors",
journal = j-TOS,
volume = "16",
number = "1",
pages = "2:1--2:28",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3375553",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Apr 8 11:43:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375553",
abstract = "As solid state drives (SSDs) are increasingly
replacing hard disk drives, the reliability of storage
systems depends on the failure modes of SSDs and the
ability of the file system layered on top to handle
these failure modes. While the classical paper on IRON
File Systems provides a thorough study of the failure
policies of three file systems common at the time, we
argue that 13 years later it is time to revisit file
system reliability with SSDs and their reliability
characteristics in mind, based on modern file systems
that incorporate journaling, copy-on-write, and
log-structured approaches and are optimized for flash.
This article presents a detailed study, spanning ext4,
Btrfs, and F2FS, and covering a number of different SSD
error modes. We develop our own fault injection
framework and explore over 1,000 error cases. Our
results indicate that 16\% of these cases result in a
file system that cannot be mounted or even repaired by
its system checker. We also identify the key file
system metadata structures that can cause such
failures, and, finally, we recommend some design
guidelines for file systems that are deployed on top of
SSDs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kuszmaul:2020:ELF,
author = "Bradley C. Kuszmaul and Matteo Frigo and Justin
Mazzola Paluska and Alexander (Sasha) Sandler",
title = "Everyone Loves File: {Oracle File Storage Service}",
journal = j-TOS,
volume = "16",
number = "1",
pages = "3:1--3:29",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377877",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Apr 8 11:43:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377877",
abstract = "Oracle File Storage Service (FSS) is an elastic
filesystem provided as a managed NFS service. A
pipelined Paxos implementation underpins a scalable
block store that provides linearizable multipage
limited-size transactions. Above the block store, a
scalable B-tree holds filesystem metadata and provides
linearizable multikey limited-size transactions.
Self-validating B-tree nodes and housekeeping
operations performed as separate transactions allow
each key in a B-tree transaction to require only one
page in the underlying block transaction. The
filesystem provides snapshots by using versioned
key-value pairs. The system is programmed using a
nonblocking lock-free programming style. Presentation
servers maintain no persistent local state making them
scalable and easy to failover. A non-scalable
Paxos-replicated hash table holds configuration
information required to bootstrap the system. An
additional B-tree provides conversational multi-key
minitransactions for control-plane information. The
system throughput can be predicted by comparing an
estimate of the network bandwidth needed for
replication to the network bandwidth provided by the
hardware. Latency on an unloaded system is about 4
times higher than a Linux NFS server backed by NVMe,
reflecting the cost of replication. FSS has been in
production since January 2018 and holds tens of
thousands of customer file systems comprising many
petabytes of data.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Li:2020:ILE,
author = "Jingwei Li and Patrick P. C. Lee and Chufeng Tan and
Chuan Qin and Xiaosong Zhang",
title = "Information Leakage in Encrypted Deduplication via
Frequency Analysis: Attacks and Defenses",
journal = j-TOS,
volume = "16",
number = "1",
pages = "4:1--4:30",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365840",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Apr 8 11:43:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365840",
abstract = "Encrypted deduplication combines encryption and
deduplication to simultaneously achieve both data
security and storage efficiency. State-of-the-art
encrypted deduplication systems mainly build on
deterministic encryption to preserve deduplication
effectiveness. However, such deterministic encryption
reveals the underlying frequency distribution of the
original plaintext chunks. This allows an adversary to
launch frequency analysis against the ciphertext chunks
and infer the content of the original plaintext chunks.
In this article, we study how frequency analysis
affects information leakage in encrypted deduplication,
from both attack and defense perspectives.
Specifically, we target backup workloads and propose a
new inference attack that exploits chunk locality to
increase the coverage of inferred chunks. We further
combine the new inference attack with the knowledge of
chunk sizes and show its attack effectiveness against
variable-size chunks. We conduct trace-driven
evaluation on both real-world and synthetic datasets
and show that our proposed attacks infer a significant
fraction of plaintext chunks under backup workloads. To
defend against frequency analysis, we present two
defense approaches, namely MinHash encryption and
scrambling. Our trace-driven evaluation shows that our
combined MinHash encryption and scrambling scheme
effectively mitigates the severity of the inference
attacks, while maintaining high storage efficiency and
incurring limited metadata access overhead.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhang:2020:CFF,
author = "Shuanglong Zhang and Robert Roy and Leah Rumancik and
An-I Andy Wang",
title = "The Composite-File File System: Decoupling One-to-One
Mapping of Files and Metadata for Better Performance",
journal = j-TOS,
volume = "16",
number = "1",
pages = "5:1--5:18",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3366684",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Apr 8 11:43:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3366684",
abstract = "The design and implementation of traditional file
systems typically use the one-to-one mapping of logical
files to their physical metadata representations. File
system optimizations generally follow this rigid
mapping and miss opportunities for an entire class of
optimizations.
We designed, implemented, and evaluated a
composite-file file system, which allows many-to-one
mappings of files to metadata. Through exploring
different mapping strategies, our empirical evaluation
shows up to a 27\% performance improvement under web
server and software development workloads, for both
disks and SSDs. This result demonstrates that our
approach of relaxing file-to-metadata mapping is
promising.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhang:2020:PEE,
author = "Yiming Zhang and Huiba Li and Shengyun Liu and Jiawei
Xu and Guangtao Xue",
title = "{PBS}: an Efficient Erasure-Coded Block Storage System
Based on Speculative Partial Writes",
journal = j-TOS,
volume = "16",
number = "1",
pages = "6:1--6:25",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365839",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Apr 8 11:43:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3365839",
abstract = "Block storage provides virtual disks that can be
mounted by virtual machines (VMs). Although erasure
coding (EC) has been widely used in many cloud storage
systems for its high efficiency and durability, current
EC schemes cannot provide high-performance block
storage for the cloud. This is because they introduce
significant overhead to small write operations (which
perform partial write to an entire EC group), whereas
cloud-oblivious applications running on VMs are often
small-write-intensive. We identify the root cause for
the poor performance of partial writes in
state-of-the-art EC schemes: for each partial write,
they have to perform a time-consuming write-after-read
operation that reads the current value of the data and
then computes and writes the parity delta, which will
be used to patch the parity in journal replay.
In this article, we present a speculative partial write
scheme (called PARIX) that supports fast small writes
in erasure-coded storage systems. We transform the
original formula of parity calculation to use the data
deltas (between the current/original data values),
instead of the parity deltas, to calculate the parities
in journal replay. For each partial write, this allows
PARIX to speculatively log only the new value of the
data without reading its original value. For a series
of $n$ partial writes to the same data, PARIX performs
pure write (instead of write-after-read) for the last $
n - 1$ ones while only introducing a small penalty of
an extra network round-trip time to the first one.
Based on PARIX, we design and implement PARIX Block
Storage (PBS), an efficient block storage system that
provides high-performance virtual disk service for VMs
running cloud-oblivious applications. PBS not only
supports fast partial writes but also realizes
efficient full writes, background journal replay, and
fast failure recovery with strong consistency
guarantees. Both microbenchmarks and trace-driven
evaluation show that PBS provides efficient block
storage and outperforms state-of-the-art EC-based
systems by orders of magnitude.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhou:2020:FEC,
author = "Tianli Zhou and Chao Tian",
title = "Fast Erasure Coding for Data Storage: a Comprehensive
Study of the Acceleration Techniques",
journal = j-TOS,
volume = "16",
number = "1",
pages = "7:1--7:24",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3375554",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Apr 8 11:43:49 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3375554",
abstract = "Various techniques have been proposed in the
literature to improve erasure code computation
efficiency, including optimizing bitmatrix design and
computation schedule, common XOR (exclusive-OR)
operation reduction, caching management techniques, and
vectorization techniques. These techniques were largely
proposed individually, and, in this work, we seek to
use them jointly. To accomplish this task, these
techniques need to be thoroughly evaluated individually
and their relation better understood. Building on
extensive testing, we develop methods to systematically
optimize the computation chain together with the
underlying bitmatrix. This led to a simple design
approach of optimizing the bitmatrix by minimizing a
weighted computation cost function, and also a
straightforward coding procedure follow a computation
schedule produced from the optimized bitmatrix to apply
XOR-level vectorization. This procedure provides better
performances than most existing techniques (e.g., those
used in ISA-L and Jerasure libraries), and sometimes
can even compete against well-known but less general
codes such as EVENODD, RDP, and STAR codes. One
particularly important observation is that vectorizing
the XOR operations is a better choice than directly
vectorizing finite field operations, not only because
of the flexibility in choosing finite field size and
the better encoding throughput, but also its minimal
migration efforts onto newer CPUs.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Arpaci-Dusseau:2020:ISS,
author = "Remzi H. Arpaci-Dusseau and Yuanyuan (YY) Zhou",
title = "Introduction to the Special Section on {SOSP 2019}",
journal = j-TOS,
volume = "16",
number = "2",
pages = "8:1--8:1",
month = jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3395778",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Jun 14 08:20:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3395778",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Aghayev:2020:CCS,
author = "Abutalib Aghayev and Sage Weil and Michael Kuchnik and
Mark Nelson and Gregory R. Ganger and George
Amvrosiadis",
title = "The Case for Custom Storage Backends in Distributed
Storage Systems",
journal = j-TOS,
volume = "16",
number = "2",
pages = "9:1--9:31",
month = jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3386362",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Jun 14 08:20:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3386362",
abstract = "For a decade, the Ceph distributed file system
followed the conventional wisdom of building its
storage backend on top of local file systems. This is a
preferred choice for most distributed file systems
today, because it allows them to benefit from the
convenience and maturity of battle-tested code. Ceph's
experience, however, shows that this comes at a high
price. First, developing a zero-overhead transaction
mechanism is challenging. Second, metadata performance
at the local level can significantly affect performance
at the distributed level. Third, supporting emerging
storage hardware is painstakingly slow.\par
Ceph addressed these issues with BlueStore, a new
backend designed to run directly on raw storage
devices. In only two years since its inception,
BlueStore outperformed previous established backends
and is adopted by 70\% of users in production. By
running in user space and fully controlling the I/O
stack, it has enabled space-efficient metadata and data
checksums, fast overwrites of erasure-coded data,
inline compression, decreased performance variability,
and avoided a series of performance pitfalls of local
file systems. Finally, it makes the adoption of
backward-incompatible storage hardware possible, an
important trait in a changing storage landscape that is
learning to embrace hardware diversity.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kim:2020:FBF,
author = "Seulbae Kim and Meng Xu and Sanidhya Kashyap and
Jungyeon Yoon and Wen Xu and Taesoo Kim",
title = "Finding Bugs in File Systems with an Extensible
Fuzzing Framework",
journal = j-TOS,
volume = "16",
number = "2",
pages = "10:1--10:35",
month = jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3391202",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Jun 14 08:20:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3391202",
abstract = "File systems are too large to be bug free. Although
handwritten test suites have been widely used to stress
file systems, they can hardly keep up with the rapid
increase in file system size and complexity, leading to
new bugs being introduced. These bugs come in various
flavors: buffer overflows to complicated semantic bugs.
Although bug-specific checkers exist, they generally
lack a way to explore file system states thoroughly.
More importantly, no turnkey solution exists that
unifies the checking effort of various aspects of a
file system under one umbrella.\par
In this article, to highlight the potential of applying
fuzzing to find any type of file system bugs in a
generic way, we propose Hydra, an extensible fuzzing
framework. Hydra provides building blocks for file
system fuzzing, including input mutators, feedback
engines, test executors, and bug post-processors. As a
result, developers only need to focus on building the
core logic for finding bugs of their interests. We
showcase the effectiveness of Hydra with four checkers
that hunt crash inconsistency, POSIX violations, logic
assertion failures, and memory errors. So far, Hydra
has discovered 157 new bugs in Linux file systems,
including three in verified file systems (FSCQ and
Yxv6).",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kolosov:2020:FTL,
author = "Oleg Kolosov and Gala Yadgar and Matan Liram and
Itzhak Tamo and Alexander Barg",
title = "On Fault Tolerance, Locality, and Optimality in
Locally Repairable Codes",
journal = j-TOS,
volume = "16",
number = "2",
pages = "11:1--11:32",
month = jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3381832",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Jun 14 08:20:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3381832",
abstract = "Erasure codes in large-scale storage systems allow
recovery of data from a failed node. A recently
developed class of codes, locally repairable codes
(LRCs), offers tradeoffs between storage overhead and
repair cost. LRCs facilitate efficient recovery
scenarios by adding parity blocks to the system.
However, these additional blocks may eventually
increase the number of blocks that must be
reconstructed. Existing LRCs differ in their use of the
parity blocks, in their locality semantics, and in
their parameter space. Thus, existing theoretical
models cannot directly compare different LRCs to
determine which code offers the best recovery
performance, and at what cost.
We perform the first systematic comparison of existing
LRC approaches. We analyze Xorbas, Azure's LRCs, and
Optimal-LRCs in light of two new metrics: average
degraded read cost and normalized repair cost. We show
the tradeoff between these costs and the code's fault
tolerance, and that different approaches offer
different choices in this tradeoff. Our experimental
evaluation on a Ceph cluster further demonstrates the
different effects of realistic system bottlenecks on
the benefit from each LRC approach. Despite these
differences, the normalized repair cost metric can
reliably identify the LRC approach that would achieve
the lowest repair cost in each setup.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Yang:2020:SEF,
author = "Fan Yang and Youmin Chen and Haiyu Mao and Youyou Lu
and Jiwu Shu",
title = "{ShieldNVM}: an Efficient and Fast Recoverable System
for Secure Non-Volatile Memory",
journal = j-TOS,
volume = "16",
number = "2",
pages = "12:1--12:31",
month = jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3381835",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Jun 14 08:20:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3381835",
abstract = "Data encryption and authentication are essential for
secure non-volatile memory (NVM). However, the
introduced security metadata needs to be atomically
written back to NVM along with data, so as to provide
crash consistency, which unfortunately incurs high
overhead. To support fine-grained data protection and
fast recovery for a secure NVM system without
compromising the performance, we propose ShieldNVM. It
first proposes an epoch-based mechanism to aggressively
cache the security metadata in the metadata cache while
retaining the consistency of them in NVM. Deferred
spreading is also introduced to reduce the calculating
overhead for data authentication. Leveraging the
ability of data hash message authentication codes, we
can always recover the consistent but old security
metadata to its newest version. By recording a limited
number of dirty addresses of the security metadata,
ShieldNVM achieves fast recovering the secure NVM
system after crashes. Compared to Osiris, a
state-of-the-art secure NVM, ShieldNVM reduces system
runtime by 39.1\% and hash message authentication code
computation overhead by 80.5\% on average over NVM
workloads. When system crashes happen, ShieldNVM's
recovery time is orders of magnitude faster than
Osiris. In addition, ShieldNVM also recovers faster
than AGIT, which is the Osiris-based state-of-the-art
mechanism addressing the recovery time of the secure
NVM system. Once the recovery process fails, instead of
dropping all data due to malicious attacks, ShieldNVM
is able to detect and locate the area of the tampered
data with the help of the tracked addresses.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Matsuzawa:2020:PQF,
author = "Keiichi Matsuzawa and Mitsuo Hayasaka and Takahiro
Shinagawa",
title = "Practical Quick File Server Migration",
journal = j-TOS,
volume = "16",
number = "2",
pages = "13:1--13:30",
month = jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377322",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Jun 14 08:20:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377322",
abstract = "Regular file server upgrades are indispensable to
improve performance, robustness, and power consumption.
In upgrading file servers, it is crucial to quickly
migrate file-sharing services between heterogeneous
servers with little downtime while minimizing
performance interference. We present a practical quick
file server migration scheme based on the postcopy
approach that defers file copy until after switching
servers. This scheme can (1) reduce downtime with
on-demand file migration, (2) avoid performance
interference using background migration, and (3)
support heterogeneous servers with stub-based file
management. We discuss several practical issues, such
as intermittent crawling and traversal strategy, and
present the solutions in our scheme. We also address
several protocol-specific issues to achieve a smooth
migration. This scheme is good enough to be adopted in
production systems, as it has been demonstrated for
several years in real operational environments. The
performance evaluation demonstrates that the downtime
is less than 3 seconds, and the first file access after
switching servers does not cause a timeout in the
default timeout settings; it takes less than 10 seconds
in most cases and up to 84.55 seconds even in a large
directory tree with a depth of 16 and a width of 1,000.
Although the total migration time is approximately 3
times longer than the traditional precopy approach that
copies all files in advance, our scheme allows the
clients to keep accessing files with acceptable
overhead. We also show that appropriate selection of
traversal strategy reduces tail latency by 88\%, and
the overhead after the migration is negligible.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Jia:2020:SED,
author = "Yichen Jia and Zili Shao and Feng Chen",
title = "{SlimCache}: an Efficient Data Compression Scheme for
Flash-based Key-value Caching",
journal = j-TOS,
volume = "16",
number = "2",
pages = "14:1--14:34",
month = jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3383124",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sun Jun 14 08:20:04 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3383124",
abstract = "Flash-based key-value caching is becoming popular in
data centers for providing high-speed key-value
services. These systems adopt slab-based space
management on flash and provide a low-cost solution for
key-value caching. However, optimizing cache efficiency
for flash-based key-value cache systems is highly
challenging, due to the huge number of key-value items
and the unique technical constraints of flash devices.
In this article, we present a dynamic on-line
compression scheme, called SlimCache, to improve the
cache hit ratio by virtually expanding the usable cache
space through data compression. We have investigated
the effect of compression granularity to achieve a
balance between compression ratio and speed, and we
leveraged the unique workload characteristics in
key-value systems to efficiently identify and separate
hot and cold data. To dynamically adapt to workload
changes during runtime, we have designed an adaptive
hot/cold area partitioning method based on a cost
model. To avoid unnecessary compression, SlimCache also
estimates data compressibility to determine whether the
data are suitable for compression or not. We have
implemented a prototype based on Twitter's Fatcache.
Our experimental results show that SlimCache can
accommodate more key-value items in flash by up to
223.4\%, effectively increasing throughput and reducing
average latency by up to 380.1\% and 80.7\%,
respectively.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kwak:2020:COR,
author = "Jaewook Kwak and Sangjin Lee and Kibin Park and Jinwoo
Jeong and Yong Ho Song",
title = "{Cosmos+ OpenSSD}: Rapid Prototype for Flash Storage
Systems",
journal = j-TOS,
volume = "16",
number = "3",
pages = "15:1--15:35",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3385073",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 15 07:00:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3385073",
abstract = "As semiconductor technology has advanced, many storage
systems have begun to use non-volatile memories as
storage media. The organization and architecture of
storage controllers have become more complex to meet
various design requirements in terms of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Sun:2020:SEF,
author = "Kuei Sun and Daniel Fryer and Russell Wang and Sagar
Patel and Joseph Chu and Matthew Lakier and Angela
Demke Brown and Ashvin Goel",
title = "{Spiffy}: Enabling File-System Aware Storage
Applications",
journal = j-TOS,
volume = "16",
number = "3",
pages = "16:1--16:39",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3386368",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 15 07:00:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3386368",
abstract = "Many file-system applications such as defragmentation
tools, file-system checkers, or data recovery tools,
operate at the storage layer. Today, developers of
these file-system aware storage applications require
detailed knowledge of the file-system \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Cha:2020:BTB,
author = "Hokeun Cha and Moohyeon Nam and Kibeom Jin and Jiwon
Seo and Beomseok Nam",
title = "{B$^3$-Tree}: Byte-Addressable Binary {B}-Tree for
Persistent Memory",
journal = j-TOS,
volume = "16",
number = "3",
pages = "17:1--17:27",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394025",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 15 07:00:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3394025",
abstract = "In this work, we propose B$^3$ -tree, a hybrid index
for persistent memory that leverages the
byte-addressability of the in-memory index and the page
locality of B-trees. As in the byte-addressable
in-memory index, B$^3$-tree is updated by 8-byte store
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Wang:2020:CWY,
author = "Hua Wang and Jiawei Zhang and Ping Huang and Xinbo Yi
and Bin Cheng and Ke Zhou",
title = "Cache What You Need to Cache: Reducing Write Traffic
in Cloud Cache via {``One-Time-Access-Exclusion''}
Policy",
journal = j-TOS,
volume = "16",
number = "3",
pages = "18:1--18:24",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397766",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 15 07:00:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3397766",
abstract = "The SSD has been playing a significantly important
role in caching systems due to its high
performance-to-cost ratio. Since the cache space is
typically much smaller than that of the backend storage
by one order of magnitude or even more, write density
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Yang:2020:BFO,
author = "Yang Yang and Qiang Cao and Jie Yao and Hong Jiang and
Li Yang",
title = "Batch-file Operations to Optimize Massive Files
Accessing: Analysis, Design, and Application",
journal = j-TOS,
volume = "16",
number = "3",
pages = "19:1--19:25",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394286",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Sat Aug 15 07:00:37 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3394286",
abstract = "Existing local file systems, designed to support a
typical single-file access mode only, can lead to poor
performance when accessing a batch of files, especially
small files. This single-file mode essentially
serializes accesses to batched files one by \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kim:2020:ISS,
author = "Jin-Soo Kim and Yang Seok Ki and Erik Riedel",
title = "Introduction to the Special Section on Computational
Storage",
journal = j-TOS,
volume = "16",
number = "4",
pages = "20:1--20:1",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3425305",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3425305",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "20",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Do:2020:CEE,
author = "Jaeyoung Do and Victor C. Ferreira and Hossein
Bobarshad and Mahdi Torabzadehkashi and Siavash Rezaei
and Ali Heydarigorji and Diego Souza and Brunno F.
Goldstein and Leandro Santiago and Min Soo Kim and
Priscila M. V. Lima and Felipe M. G. Fran{\c{c}}a and
Vladimir Alves",
title = "Cost-effective, Energy-efficient, and Scalable Storage
Computing for Large-scale {AI} Applications",
journal = j-TOS,
volume = "16",
number = "4",
pages = "21:1--21:37",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3415580",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3415580",
abstract = "The growing volume of data produced continuously in
the Cloud and at the Edge poses significant challenges
for large-scale AI applications to extract and learn
useful information from the data in a timely and
efficient way. The goal of this article is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "21",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kougkas:2020:BSS,
author = "Anthony Kougkas and Hariharan Devarajan and Xian-He
Sun",
title = "Bridging Storage Semantics Using Data Labels and
Asynchronous {I/O}",
journal = j-TOS,
volume = "16",
number = "4",
pages = "22:1--22:34",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3415579",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3415579",
abstract = "In the era of data-intensive computing, large-scale
applications, in both scientific and the BigData
communities, demonstrate unique I/O requirements
leading to a proliferation of different storage devices
and software stacks, many of which have \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "22",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zheng:2020:SDR,
author = "Qing Zheng and Charles D. Cranor and Ankush Jain and
Gregory R. Ganger and Garth A. Gibson and George
Amvrosiadis and Bradley W. Settlemyer and Gary Grider",
title = "Streaming Data Reorganization at Scale with {DeltaFS}
Indexed Massive Directories",
journal = j-TOS,
volume = "16",
number = "4",
pages = "23:1--23:31",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3415581",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3415581",
abstract = "Complex storage stacks providing data compression,
indexing, and analytics help leverage the massive
amounts of data generated today to derive insights. It
is challenging to perform this computation, however,
while fully utilizing the underlying storage \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "23",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Shu:2020:TDD,
author = "Jiwu Shu and Youmin Chen and Qing Wang and Bohong Zhu
and Junru Li and Youyou Lu",
title = "{TH-DPMS}: Design and Implementation of an
{RDMA}-enabled {Distributed Persistent Memory Storage
System}",
journal = j-TOS,
volume = "16",
number = "4",
pages = "24:1--24:31",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3412852",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3412852",
abstract = "The rapidly increasing data in recent years requires
the datacenter infrastructure to store and process data
with extremely high throughput and low latency.
Fortunately, persistent memory (PM) and RDMA
technologies bring new opportunities towards this
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "24",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Ji:2020:ICA,
author = "Cheng Ji and Riwei Pan and Li-Pin Chang and Liang Shi
and Zongwei Zhu and Yu Liang and Tei-Wei Kuo and Chun
Jason Xue",
title = "Inspection and Characterization of App File Usage in
Mobile Devices",
journal = j-TOS,
volume = "16",
number = "4",
pages = "25:1--25:25",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3404119",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3404119",
abstract = "While the computing power of mobile devices has been
quickly evolving in recent years, the growth of mobile
storage capacity is, however, relatively slower. A
common problem shared by budget-phone users is that
they frequently run out of storage space. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "25",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Ye:2020:HCF,
author = "Liuqing Ye and Dan Feng and Yuchong Hu and Xueliang
Wei",
title = "Hybrid Codes: Flexible Erasure Codes with Optimized
Recovery Performance",
journal = j-TOS,
volume = "16",
number = "4",
pages = "26:1--26:26",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3407193",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3407193",
abstract = "Erasure codes are being extensively deployed in
practical storage systems to prevent data loss with low
redundancy. However, these codes require excessive disk
I/Os and network traffic for recovering unavailable
data. Among all erasure codes, Minimum \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "26",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Noh:2021:TTA,
author = "Sam H. Noh",
title = "Thanking the {TOS Associated Editors and Reviewers}",
journal = j-TOS,
volume = "17",
number = "1",
pages = "1:1--1:2",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442683",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3442683",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Noh:2021:ISS,
author = "Sam H. Noh and Brent Welch",
title = "Introduction to the Special Section on {USENIX FAST
2020}",
journal = j-TOS,
volume = "17",
number = "1",
pages = "2:1--2:2",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3442685",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3442685",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Maneas:2021:RSE,
author = "Stathis Maneas and Kaveh Mahdaviani and Tim Emami and
Bianca Schroeder",
title = "Reliability of {SSDs} in Enterprise Storage Systems: a
Large-Scale Field Study",
journal = j-TOS,
volume = "17",
number = "1",
pages = "3:1--3:27",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423088",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3423088",
abstract = "This article presents the first large-scale field
study of NAND-based SSDs in enterprise storage systems
(in contrast to drives in distributed data center
storage systems). The study is based on a very
comprehensive set of field data, covering 1.6
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Ganesan:2021:SEC,
author = "Aishwarya Ganesan and Ramnatthan Alagappan and Andrea
C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "Strong and Efficient Consistency with
Consistency-aware Durability",
journal = j-TOS,
volume = "17",
number = "1",
pages = "4:1--4:27",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423138",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3423138",
abstract = "We introduce consistency-aware durability or Cad, a
new approach to durability in distributed storage that
enables strong consistency while delivering high
performance. We demonstrate the efficacy of this
approach by designing cross-client monotonic \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhan:2021:CAW,
author = "Yang Zhan and Alex Conway and Yizheng Jiao and Nirjhar
Mukherjee and Ian Groombridge and Michael A. Bender and
Martin Farach-Colton and William Jannen and Rob Johnson
and Donald E. Porter and Jun Yuan",
title = "Copy-on-Abundant-Write for Nimble File System Clones",
journal = j-TOS,
volume = "17",
number = "1",
pages = "5:1--5:27",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423495",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/doi/10.1145/3423495",
abstract = "Making logical copies, or clones, of files and
directories is critical to many real-world applications
and workflows, including backups, virtual machines, and
containers. An ideal clone implementation meets the
following performance goals: (1) creating \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Cheng:2021:NOH,
author = "Wen Cheng and Chunyan Li and Lingfang Zeng and Yingjin
Qian and Xi Li and Andr{\'e} Brinkmann",
title = "{NVMM}-Oriented Hierarchical Persistent Client Caching
for {Lustre}",
journal = j-TOS,
volume = "17",
number = "1",
pages = "6:1--6:22",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3404190",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3404190",
abstract = "In high-performance computing (HPC), data and metadata
are stored on special server nodes and client
applications access the servers' data and metadata
through a network, which induces network latencies and
resource contention. These server nodes are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Papagiannis:2021:KEM,
author = "Anastasios Papagiannis and Giorgos Saloustros and
Giorgos Xanthakis and Giorgos Kalaentzis and Pilar
Gonzalez-Ferez and Angelos Bilas",
title = "{Kreon}: an Efficient Memory-Mapped Key-Value Store
for Flash Storage",
journal = j-TOS,
volume = "17",
number = "1",
pages = "7:1--7:32",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418414",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3418414",
abstract = "Persistent key-value stores have emerged as a main
component in the data access path of modern data
processing systems. However, they exhibit high CPU and
I/O overhead. Nowadays, due to power limitations, it is
important to reduce CPU overheads for data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Yadgar:2021:SBW,
author = "Gala Yadgar and Moshe Gabel and Shehbaz Jaffer and
Bianca Schroeder",
title = "{SSD}-based Workload Characteristics and Their
Performance Implications",
journal = j-TOS,
volume = "17",
number = "1",
pages = "8:1--8:26",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423137",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Feb 5 11:10:27 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3423137",
abstract = "Storage systems are designed and optimized relying on
wisdom derived from analysis studies of file-system and
block-level workloads. However, while SSDs are becoming
a dominant building block in many storage systems,
their design continues to build on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Gavrilovska:2021:ISI,
author = "Ada Gavrilovska and Erez Zadok",
title = "Introduction to the Special Issue on {USENIX ATC
2020}",
journal = j-TOS,
volume = "17",
number = "2",
pages = "9:1--9:2",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457170",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 16 08:47:13 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3457170",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Im:2021:DLT,
author = "Junsu Im and Jinwook Bae and Chanwoo Chung and Arvind
and Sungjin Lee",
title = "Design of {LSM}-tree-based Key-value {SSDs} with
Bounded Tails",
journal = j-TOS,
volume = "17",
number = "2",
pages = "10:1--10:27",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3452846",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 16 08:47:13 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3452846",
abstract = "Key-value store based on a log-structured merge-tree
(LSM-tree) is preferable to hash-based key-value store,
because an LSM-tree can support a wider variety of
operations and show better performance, especially for
writes. However, LSM-tree is difficult \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Bittman:2021:TDC,
author = "Daniel Bittman and Peter Alvaro and Pankaj Mehra and
Darrell D. E. Long and Ethan L. Miller",
title = "{Twizzler}: a Data-centric {OS} for Non-volatile
Memory",
journal = j-TOS,
volume = "17",
number = "2",
pages = "11:1--11:31",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3454129",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 16 08:47:13 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3454129",
abstract = "Byte-addressable, non-volatile memory (NVM) presents
an opportunity to rethink the entire system stack. We
present Twizzler, an operating system redesign for this
near-future. Twizzler removes the kernel from the I/O
path, provides programs with memory-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Rebello:2021:CAR,
author = "Anthony Rebello and Yuvraj Patel and Ramnatthan
Alagappan and Andrea C. Arpaci-Dusseau and Remzi H.
Arpaci-Dusseau",
title = "Can Applications Recover from fsync Failures?",
journal = j-TOS,
volume = "17",
number = "2",
pages = "12:1--12:30",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450338",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 16 08:47:13 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3450338",
abstract = "We analyze how file systems and modern data-intensive
applications react to fsync failures. First, we
characterize how three Linux file systems (ext4, XFS,
Btrfs) behave in the presence of failures. We find
commonalities across file systems (pages are always
marked clean, certain block writes always lead to
unavailability) as well as differences (page content
and failure reporting is varied). Next, we study how
five widely used applications (PostgreSQL, LMDB,
LevelDB, SQLite, Redis) handle fsync failures. Our
findings show that although applications use many
failure-handling strategies, none are sufficient: fsync
failures can cause catastrophic outcomes such as data
loss and corruption. Our findings have strong
implications for the design of file systems and
applications that intend to provide strong durability
guarantees.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Li:2021:RPE,
author = "Xiaolu Li and Zuoru Yang and Jinhong Li and Runhui Li
and Patrick P. C. Lee and Qun Huang and Yuchong Hu",
title = "Repair Pipelining for Erasure-coded Storage:
Algorithms and Evaluation",
journal = j-TOS,
volume = "17",
number = "2",
pages = "13:1--13:29",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436890",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 16 08:47:13 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3436890",
abstract = "We propose repair pipelining, a technique that speeds
up the repair performance in general erasure-coded
storage. By carefully scheduling the repair of failed
data in small-size units across storage nodes in a
pipelined manner, repair pipelining reduces \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kim:2021:PMP,
author = "Joonsung Kim and Kanghyun Choi and Wonsik Lee and
Jangwoo Kim",
title = "Performance Modeling and Practical Use Cases for
Black-Box {SSDs}",
journal = j-TOS,
volume = "17",
number = "2",
pages = "14:1--14:38",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3440022",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 16 08:47:13 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3440022",
abstract = "Modern servers are actively deploying Solid-State
Drives (SSDs) thanks to their high throughput and low
latency. However, current server architects cannot
achieve the full performance potential of commodity
SSDs, as SSDs are complex devices designed for specific
goals (e.g., latency, throughput, endurance, cost) with
their internal mechanisms undisclosed to users. In this
article, we propose SSDcheck, a novel SSD performance
model to extract various internal mechanisms and
predict the latency of next access to commodity
black-box SSDs. We identify key performance-critical
features (e.g., garbage collection, write buffering)
and find their parameters (i.e., size, threshold) from
each SSD by using our novel diagnosis code snippets.
Then, SSDcheck constructs a performance model for a
target SSD and dynamically manages the model to predict
the latency of the next access. In addition, SSDcheck
extracts and provides other useful internal mechanisms
(e.g., fetch unit in multi-queue SSDs, background tasks
triggering idle-time interval) for the storage system
to fully exploit SSDs. By using those useful features
and the performance model, we propose multiple
practical use cases. Our evaluations show that
SSDcheck's performance model is highly accurate, and
proposed use cases achieve significant performance
improvement in various scenarios.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Pan:2021:PLA,
author = "Cheng Pan and Xiaolin Wang and Yingwei Luo and Zhenlin
Wang",
title = "Penalty- and Locality-aware Memory Allocation in
{Redis} Using Enhanced {AET}",
journal = j-TOS,
volume = "17",
number = "2",
pages = "15:1--15:45",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447573",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Jun 16 08:47:13 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3447573",
abstract = "Due to large data volume and low latency requirements
of modern web services, the use of an in-memory
key-value (KV) cache often becomes an inevitable choice
(e.g., Redis and Memcached). The in-memory cache holds
hot data, reduces request latency, and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Lu:2021:ISS,
author = "Shan Lu and Jon Howell",
title = "Introduction to the Special Section on {USENIX OSDI
2020}",
journal = j-TOS,
volume = "17",
number = "3",
pages = "16:1--16:1",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3479434",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3479434",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Yang:2021:LSA,
author = "Juncheng Yang and Yao Yue and K. V. Rashmi",
title = "A Large-scale Analysis of Hundreds of In-memory
Key-value Cache Clusters at {Twitter}",
journal = j-TOS,
volume = "17",
number = "3",
pages = "17:1--17:35",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3468521",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3468521",
abstract = "Modern web services use in-memory caching extensively
to increase throughput and reduce latency. There have
been several workload analyses of production systems
that have fueled research in improving the
effectiveness of in-memory caching systems. However,.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Wei:2021:XFR,
author = "Xingda Wei and Rong Chen and Haibo Chen and Binyu
Zang",
title = "{XStore}: Fast {RDMA}-Based Ordered Key--Value Store
Using Remote Learned Cache",
journal = j-TOS,
volume = "17",
number = "3",
pages = "18:1--18:32",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3468520",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3468520",
abstract = "RDMA (Remote Direct Memory Access) has gained
considerable interests in network-attached in-memory
key-value stores. However, traversing the remote
tree-based index in ordered key-value stores with RDMA
becomes a critical obstacle, causing an order-of-.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhu:2021:ORE,
author = "Bohong Zhu and Youmin Chen and Qing Wang and Youyou Lu
and Jiwu Shu",
title = "{Octopus +}: an {RDMA}-Enabled Distributed Persistent
Memory File System",
journal = j-TOS,
volume = "17",
number = "3",
pages = "19:1--19:25",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3448418",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3448418",
abstract = "Non-volatile memory and remote direct memory access
(RDMA) provide extremely high performance in storage
and network hardware. However, existing distributed
file systems strictly isolate file system and network
layers, and the heavy layered software \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhang:2021:TVM,
author = "Jiachen Zhang and Lixiao Cui and Peng Li and Xiaoguang
Liu and Gang Wang",
title = "Toward Virtual Machine Image Management for Persistent
Memory",
journal = j-TOS,
volume = "17",
number = "3",
pages = "20:1--20:24",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450976",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3450976",
abstract = "Persistent memory's (PM) byte-addressability and high
capacity will also make it emerging for virtualized
environment. Modern virtual machine monitors virtualize
PM using either I/O virtualization or memory
virtualization. However, I/O virtualization will
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "20",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Hong:2021:RFR,
author = "Duwon Hong and Keonsoo Ha and Minseok Ko and Myoungjun
Chun and Yoona Kim and Sungjin Lee and Jihong Kim",
title = "{Reparo}: a Fast {RAID} Recovery Scheme for
Ultra-large {SSDs}",
journal = j-TOS,
volume = "17",
number = "3",
pages = "21:1--21:24",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3450977",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3450977",
abstract = "A recent ultra-large SSD (e.g., a 32-TB SSD) provides
many benefits in building cost-efficient enterprise
storage systems. Owing to its large capacity, however,
when such SSDs fail in a RAID storage system, a long
rebuild overhead is inevitable for RAID \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "21",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Chikhaoui:2021:MOO,
author = "Amina Chikhaoui and Laurent Lemarchand and Kamel
Boukhalfa and Jalil Boukhobza",
title = "Multi-objective Optimization of Data Placement in a
Storage-as-a-Service Federated Cloud",
journal = j-TOS,
volume = "17",
number = "3",
pages = "22:1--22:32",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3452741",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3452741",
abstract = "Cloud federation enables service providers to
collaborate to provide better services to customers.
For cloud storage services, optimizing customer object
placement for a member of a federation is a real
challenge. Storage, migration, and latency costs
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "22",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhang:2021:NPM,
author = "Baoquan Zhang and David H. C. Du",
title = "{NVLSM}: a Persistent Memory Key--Value Store Using
Log-Structured Merge Tree with Accumulative
Compaction",
journal = j-TOS,
volume = "17",
number = "3",
pages = "23:1--23:26",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453300",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3453300",
abstract = "Computer systems utilizing byte-addressable
Non-Volatile Memory (NVM) as memory/storage can provide
low-latency data persistence. The widely used key-value
stores using Log-Structured Merge Tree (LSM-Tree) are
still beneficial for NVM systems in aspects \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "23",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Nachman:2021:GOS,
author = "Aviv Nachman and Sarai Sheinvald and Ariel Kolikant
and Gala Yadgar",
title = "{GoSeed}: Optimal Seeding Plan for Deduplicated
Storage",
journal = j-TOS,
volume = "17",
number = "3",
pages = "24:1--24:28",
month = aug,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3453301",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 15 05:45:21 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3453301",
abstract = "Deduplication decreases the physical occupancy of
files in a storage volume by removing duplicate copies
of data chunks, but creates data-sharing dependencies
that complicate standard storage management tasks.
Specifically, data migration plans must \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "24",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Aguilera:2021:ISS,
author = "Marcos K. Aguilera and Gala Yadgar",
title = "Introduction to the Special Section on {USENIX FAST
2021}",
journal = j-TOS,
volume = "17",
number = "4",
pages = "25:1--25:1",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3485449",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3485449",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "25",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Dong:2021:RED,
author = "Siying Dong and Andrew Kryczka and Yanqin Jin and
Michael Stumm",
title = "{RocksDB}: Evolution of Development Priorities in a
Key--value Store Serving Large-scale Applications",
journal = j-TOS,
volume = "17",
number = "4",
pages = "26:1--26:32",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483840",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3483840",
abstract = "This article is an eight-year retrospective on
development priorities for RocksDB, a key-value store
developed at Facebook that targets large-scale
distributed systems and that is optimized for Solid
State Drives (SSDs). We describe how the priorities
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "26",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Li:2021:LNS,
author = "Cheng Li and Hao Chen and Chaoyi Ruan and Xiaosong Ma
and Yinlong Xu",
title = "Leveraging {NVMe SSDs} for Building a Fast,
Cost-effective, {LSM}-tree-based {KV} Store",
journal = j-TOS,
volume = "17",
number = "4",
pages = "27:1--27:29",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3480963",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3480963",
abstract = "Key-value (KV) stores support many crucial
applications and services. They perform fast in-memory
processing but are still often limited by I/O
performance. The recent emergence of high-speed
commodity non-volatile memory express solid-state
drives (NVMe \ldots{})",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "27",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Fukatani:2021:LDR,
author = "Takayuki Fukatani and Hieu Hanh Le and Haruo Yokota",
title = "Lightweight Dynamic Redundancy Control with Adaptive
Encoding for Server-based Storage",
journal = j-TOS,
volume = "17",
number = "4",
pages = "28:1--28:38",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3456292",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3456292",
abstract = "With the recent performance improvements in commodity
hardware, low-cost commodity server-based storage has
become a practical alternative to dedicated-storage
appliances. Because of the high failure rate of
commodity servers, data redundancy across \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "28",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Ghoshal:2021:PAM,
author = "Devarshi Ghoshal and Lavanya Ramakrishnan",
title = "Programming Abstractions for Managing Workflows on
Tiered Storage Systems",
journal = j-TOS,
volume = "17",
number = "4",
pages = "29:1--29:21",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3457119",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3457119",
abstract = "Scientific workflows in High Performance Computing
(HPC) environments are processing large amounts of
data. The storage hierarchy on HPC systems is getting
deeper, driven by new technologies (NVRAMs, SSDs, etc.)
There is a need for new programming \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "29",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhang:2021:IPD,
author = "Datong Zhang and Yuhui Deng and Yi Zhou and Yifeng Zhu
and Xiao Qin",
title = "Improving the Performance of Deduplication-Based
Backup Systems via Container Utilization Based Hot
Fingerprint Entry Distilling",
journal = j-TOS,
volume = "17",
number = "4",
pages = "30:1--30:23",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3459626",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3459626",
abstract = "Data deduplication techniques construct an index
consisting of fingerprint entries to identify and
eliminate duplicated copies of repeating data. The
bottleneck of disk-based index lookup and data
fragmentation caused by eliminating duplicated chunks
are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "30",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Song:2021:TRN,
author = "Xiaojia Song and Tao Xie and Stephen Fischer",
title = "Two Reconfigurable {NDP} Servers: Understanding the
Impact of Near-Data Processing on Data Center
Applications",
journal = j-TOS,
volume = "17",
number = "4",
pages = "31:1--31:27",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460201",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3460201",
abstract = "Existing near-data processing (NDP)-powered
architectures have demonstrated their strength for some
data-intensive applications. Data center servers,
however, have to serve not only data-intensive but also
compute-intensive applications. An in-depth \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "31",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Wu:2021:FAM,
author = "Fenggang Wu and Bingzhe Li and David H. C. Du",
title = "{FluidSMR}: Adaptive Management for Hybrid {SMR}
Drives",
journal = j-TOS,
volume = "17",
number = "4",
pages = "32:1--32:30",
month = nov,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465404",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Nov 3 09:56:08 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3465404",
abstract = "Hybrid Shingled Magnetic Recording (H-SMR) drives are
the most recently developed SMR drives, which allow
dynamic conversion of the recording format between
Conventional Magnetic Recording (CMR) and SMR on a
single disk drive. We identify the unique \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "32",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Brown:2022:ISS,
author = "Angela Demke Brown and Jay Lorch",
title = "Introduction to the Special Section on {USENIX OSDI
2021}",
journal = j-TOS,
volume = "18",
number = "1",
pages = "1:1--1:1",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3507950",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3507950",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "1",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Wang:2022:PNP,
author = "Qing Wang and Youyou Lu and Junru Li and Minhui Xie
and Jiwu Shu",
title = "\pkg{Nap}: Persistent Memory Indexes for {NUMA}
Architectures",
journal = j-TOS,
volume = "18",
number = "1",
pages = "2:1--2:35",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3507922",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3507922",
abstract = "We present Nap, a black-box approach that converts
concurrent persistent memory (PM) indexes into
non-uniform memory access (NUMA)-aware counterparts.
Based on the observation that real-world workloads
always feature skewed access patterns, Nap introduces
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "2",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Tai:2022:OSP,
author = "Amy Tai and Igor Smolyar and Michael Wei and Dan
Tsafrir",
title = "Optimizing Storage Performance with Calibrated
Interrupts",
journal = j-TOS,
volume = "18",
number = "1",
pages = "3:1--3:32",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3505139",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3505139",
abstract = "After request completion, an I/O device must decide
whether to minimize latency by immediately firing an
interrupt or to optimize for throughput by delaying the
interrupt, anticipating that more requests will
complete soon and help amortize the interrupt
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "3",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Macko:2022:SDF,
author = "Peter Macko and Jason Hennessey",
title = "Survey of Distributed File System Design Choices",
journal = j-TOS,
volume = "18",
number = "1",
pages = "4:1--4:34",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465405",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3465405",
abstract = "Decades of research on distributed file systems and
storage systems exists. New researchers and engineers
have a lot of literature to study, but only a
comparatively small number of high-level design choices
are available when creating a distributed file system.
And within each aspect of the system, typically several
common approaches are used. So, rather than surveying
distributed file systems, this article presents a
survey of important design decisions and, within those
decisions, the most commonly used options. It also
presents a qualitative exploration of their tradeoffs.
We include several relatively recent designs and their
variations that illustrate other tradeoff choices in
the design space, despite being underexplored. In doing
so, we provide a primer on distributed file systems,
and we also show areas that are overexplored and
underexplored, in the hopes of inspiring new
research.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "4",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Litz:2022:PRP,
author = "Heiner Litz and Javier Gonzalez and Ana Klimovic and
Christos Kozyrakis",
title = "\pkg{RAIL}: Predictable, Low Tail Latency for {NVMe}
Flash",
journal = j-TOS,
volume = "18",
number = "1",
pages = "5:1--5:21",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465406",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3465406",
abstract = "Flash-based storage is replacing disk for an
increasing number of data center applications,
providing orders of magnitude higher throughput and
lower average latency. However, applications also
require predictable storage latency. Existing Flash
devices \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "5",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Wang:2022:EEB,
author = "Shucheng Wang and Ziyi Lu and Qiang Cao and Hong Jiang
and Jie Yao and Yuanyuan Dong and Puyuan Yang and
Changsheng Xie",
title = "Exploration and Exploitation for Buffer-Controlled
{HDD}-Writes for {SSD--HDD} Hybrid Storage Server",
journal = j-TOS,
volume = "18",
number = "1",
pages = "6:1--6:29",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3465410",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3465410",
abstract = "Hybrid storage servers combining solid-state drives
(SSDs) and hard-drive disks (HDDs) provide
cost-effectiveness and $ \mu $ s-level responsiveness
for applications. However, observations from cloud
storage system Pangu manifest that HDDs are often
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "6",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Li:2022:PBP,
author = "Jun Li and Xiaofei Xu and Zhigang Cai and Jianwei Liao
and Kenli Li and Balazs Gerofi and Yutaka Ishikawa",
title = "Pattern-Based Prefetching with Adaptive Cache
Management Inside of Solid-State Drives",
journal = j-TOS,
volume = "18",
number = "1",
pages = "7:1--7:25",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3474393",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3474393",
abstract = "This article proposes a pattern-based prefetching
scheme with the support of adaptive cache management,
at the flash translation layer of solid-state drives
(SSDs). It works inside of SSDs and has features of OS
dependence and uses transparency. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "7",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Wang:2022:CLI,
author = "Zhaoguo Wang and Haibo Chen and Youyun Wang and Chuzhe
Tang and Huan Wang",
title = "The Concurrent Learned Indexes for Multicore Data
Storage",
journal = j-TOS,
volume = "18",
number = "1",
pages = "8:1--8:35",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3478289",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3478289",
abstract = "We present XIndex, which is a concurrent index library
and designed for fast queries. It includes a concurrent
ordered index (XIndex-R) and a concurrent hash index
(XIndex-H). Similar to a recent proposal of the learned
index, the indexes in XIndex use \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "8",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Gao:2022:RTF,
author = "Congming Gao and Min Ye and Chun Jason Xue and Youtao
Zhang and Liang Shi and Jiwu Shu and Jun Yang",
title = "Reprogramming {$3$D} {TLC} Flash Memory based Solid
State Drives",
journal = j-TOS,
volume = "18",
number = "1",
pages = "9:1--9:33",
month = feb,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3487064",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Fri Mar 4 08:42:28 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3487064",
abstract = "NAND flash memory-based SSDs have been widely adopted.
The scaling of SSD has evolved from plannar (2D) to 3D
stacking. For reliability and other reasons, the
technology node in 3D NAND SSD is larger than in 2D,
but data density can be increased via \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "9",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Calciu:2022:ISS,
author = "Irina Calciu and Geoff Kuenning",
title = "Introduction to the Special Section on {USENIX ATC
2021}",
journal = j-TOS,
volume = "18",
number = "2",
pages = "10:1--10:2",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3519550",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3519550",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "10",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zuo:2022:ROS,
author = "Pengfei Zuo and Qihui Zhou and Jiazhao Sun and Liu
Yang and Shuangwu Zhang and Yu Hua and James Cheng and
Rongfeng He and Huabing Yan",
title = "{RACE}: One-sided {RDMA}-conscious Extendible
Hashing",
journal = j-TOS,
volume = "18",
number = "2",
pages = "11:1--11:29",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511895",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3511895",
abstract = "Memory disaggregation is a promising technique in
datacenters with the benefit of improving resource
utilization, failure isolation, and elasticity. Hashing
indexes have been widely used to provide fast lookup
services in distributed memory systems. However,
traditional hashing indexes become inefficient for
disaggregated memory, since the computing power in the
memory pool is too weak to execute complex index
requests. To provide efficient indexing services in
disaggregated memory scenarios, this article proposes
RACE hashing, a one-sided RDMA-Conscious Extendible
hashing index with lock-free remote concurrency control
and efficient remote resizing. RACE hashing enables all
index operations to be efficiently executed by using
only one-sided RDMA verbs without involving any compute
resource in the memory pool. To support remote
concurrent access with high performance, RACE hashing
leverages a lock-free remote concurrency control scheme
to enable different clients to concurrently operate the
same hashing index in the memory pool in a lock-free
manner. To resize the hash table with low overheads,
RACE hashing leverages an extendible remote resizing
scheme to reduce extra RDMA accesses caused by
extendible resizing and allow concurrent request
execution during resizing. Extensive experimental
results demonstrate that RACE hashing outperforms
state-of-the-art distributed in-memory hashing indexes
by 1.4--13.7$ \times $ in YCSB hybrid workloads",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "11",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kwon:2022:SFF,
author = "Dongup Kwon and Wonsik Lee and Dongryeong Kim and
Junehyuk Boo and Jangwoo Kim",
title = "{SmartFVM}: a Fast, Flexible, and Scalable
Hardware-based Virtualization for Commodity Storage
Devices",
journal = j-TOS,
volume = "18",
number = "2",
pages = "12:1--12:27",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511213",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/doi/10.1145/3511213",
abstract = "A computational storage device incorporating a
computation unit inside or near its storage unit is a
highly promising technology to maximize a storage
server's performance. However, to apply such
computational storage devices and take their full
potential in virtualized environments, server
architects must resolve a fundamental challenge:
cost-effective virtualization. This critical challenge
can be directly addressed by the following questions:
(1) how to virtualize two different hardware units
(i.e., computation and storage), and (2) how to
integrate them to construct virtual computational
storage devices, and (3) how to provide them to users.
However, the existing methods for computational storage
virtualization severely suffer from their low
performance and high costs due to the lack of
hardware-assisted virtualization support.\par
In this work, we propose SmartFVM-Engine, an FPGA card
designed to maximize the performance and
cost-effectiveness of computational storage
virtualization. SmartFVM-Engine introduces three key
ideas to achieve the design goals. First, it achieves
high virtualization performance by applying
hardware-assisted virtualization to both computation
and storage units. Second, it further improves the
performance by applying hardware-assisted resource
orchestration for the virtualized units. Third, it
achieves high cost-effectiveness by dynamically
constructing and scheduling virtual computational
storage devices. To the best of our knowledge, this is
the first work to implement a hardware-assisted
virtualization mechanism for modern computational
storage devices.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "12",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kassa:2022:POD,
author = "Hiwot Tadese Kassa and Jason Akers and Mrinmoy Ghosh
and Zhichao Cao and Vaibhav Gogte and Ronald
Dreslinski",
title = "Power-optimized Deployment of Key-value Stores Using
Storage Class Memory",
journal = j-TOS,
volume = "18",
number = "2",
pages = "13:1--13:26",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3511905",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3511905",
abstract = "High-performance flash-based key-value stores in
data-centers utilize large amounts of DRAM to cache hot
data. However, motivated by the high cost and power
consumption of DRAM, server designs with lower
DRAM-per-compute ratio are becoming popular. These
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "13",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Han:2022:SFR,
author = "Runzhou Han and Om Rameshwar Gatla and Mai Zheng and
Jinrui Cao and Di Zhang and Dong Dai and Yong Chen and
Jonathan Cook",
title = "A Study of Failure Recovery and Logging of
High-Performance Parallel File Systems",
journal = j-TOS,
volume = "18",
number = "2",
pages = "14:1--14:44",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483447",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3483447",
abstract = "Large-scale parallel file systems (PFSs) play an
essential role in high-performance computing (HPC).
However, despite their importance, their reliability is
much less studied or understood compared with that of
local storage systems or cloud storage \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "14",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Cao:2022:HSC,
author = "Zhichao Cao and Huibing Dong and Yixun Wei and Shiyong
Liu and David H. C. Du",
title = "{IS-HBase}: an In-Storage Computing Optimized {HBase}
with {I/O} Offloading and Self-Adaptive Caching in
Compute-Storage Disaggregated Infrastructure",
journal = j-TOS,
volume = "18",
number = "2",
pages = "15:1--15:42",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3488368",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3488368",
abstract = "Active storage devices and in-storage computing are
proposed and developed in recent years to effectively
reduce the amount of required data traffic and to
improve the overall application performance. They are
especially preferred in the compute-storage \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "15",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Liu:2022:CSP,
author = "Weihua Liu and Fei Wu and Xiang Chen and Meng Zhang
and Yu Wang and Xiangfeng Lu and Changsheng Xie",
title = "Characterization Summary of Performance, Reliability,
and Threshold Voltage Distribution of {$3$D}
Charge-Trap {NAND} Flash Memory",
journal = j-TOS,
volume = "18",
number = "2",
pages = "16:1--16:25",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3491230",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3491230",
abstract = "Solid-state drive (SSD) gradually dominates in the
high-performance storage scenarios. Three-dimension
(3D) NAND flash memory owning high-storage capacity is
becoming a mainstream storage component of SSD.
However, the interferences of the new 3D charge
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "16",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Bhimani:2022:ASI,
author = "Janki Bhimani and Zhengyu Yang and Jingpei Yang and
Adnan Maruf and Ningfang Mi and Rajinikanth Pandurangan
and Changho Choi and Vijay Balakrishnan",
title = "Automatic Stream Identification to Improve Flash
Endurance in Data Centers",
journal = j-TOS,
volume = "18",
number = "2",
pages = "17:1--17:29",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470007",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3470007",
abstract = "The demand for high performance I/O in
Storage-as-a-Service (SaaS) is increasing day by day.
To address this demand, NAND Flash-based Solid-state
Drives (SSDs) are commonly used in data centers as
cache- or top-tiers in the storage rack ascribe to
their \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "17",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Ge:2022:HFS,
author = "Xiongzi Ge and Zhichao Cao and David H. C. Du and
Pradeep Ganesan and Dennis Hahn",
title = "{HintStor}: a Framework to Study {I/O} Hints in
Heterogeneous Storage",
journal = j-TOS,
volume = "18",
number = "2",
pages = "18:1--18:24",
month = may,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3489143",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon May 9 06:54:11 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3489143",
abstract = "To bridge the giant semantic gap between applications
and modern storage systems, passing a piece of tiny and
useful information, called I/O access hints, from upper
layers to the storage layer may greatly improve
application performance and ease data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "18",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Noh:2022:ISS,
author = "Sam H. Noh",
title = "Introduction to the Special Section on {SOSP 2021}",
journal = j-TOS,
volume = "18",
number = "3",
pages = "19:1--19:1",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3542850",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3542850",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "19",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Ganesan:2022:ENE,
author = "Aishwarya Ganesan and Ramnatthan Alagappan and Anthony
Rebello and Andrea C. Arpaci-Dusseau and Remzi H.
Arpaci-Dusseau",
title = "Exploiting Nil-external Interfaces for Fast Replicated
Storage",
journal = j-TOS,
volume = "18",
number = "3",
pages = "20:1--20:35",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3542821",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3542821",
abstract = "Do some storage interfaces enable higher performance
than others? Can one identify and exploit such
interfaces to realize high performance in storage
systems? This article answers these questions in the
affirmative by identifying nil-externality, a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "20",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{McAllister:2022:KTP,
author = "Sara McAllister and Benjamin Berg and Julian
Tutuncu-Macias and Juncheng Yang and Sathya Gunasekar
and Jimmy Lu and Daniel S. Berger and Nathan Beckmann
and Gregory R. Ganger",
title = "{Kangaroo}: Theory and Practice of Caching Billions of
Tiny Objects on Flash",
journal = j-TOS,
volume = "18",
number = "3",
pages = "21:1--21:33",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3542928",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3542928",
abstract = "Many social-media and IoT services have very large
working sets consisting of billions of tiny ($ \approx
$ 100 B) objects. Large, flash-based caches are
important to serving these working sets at acceptable
monetary cost. However, caching tiny objects on flash
is \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "21",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Lembke:2022:DIF,
author = "James Lembke and Pierre-Louis Roman and Patrick
Eugster",
title = "{DEFUSE}: an Interface for Fast and Correct User Space
File System Access",
journal = j-TOS,
volume = "18",
number = "3",
pages = "22:1--22:29",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494556",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3494556",
abstract = "Traditionally, the only option for developers was to
implement file systems (FSs) via drivers within the
operating system kernel. However, there exists a
growing number of file systems (FSs), notably
distributed FSs for the cloud, whose interfaces are
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "22",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhang:2022:BGF,
author = "Yiwen Zhang and Ting Yao and Jiguang Wan and
Changsheng Xie",
title = "Building {GC}-free Key--value Store on {HM-SMR} Drives
with {ZoneFS}",
journal = j-TOS,
volume = "18",
number = "3",
pages = "23:1--23:23",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502846",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3502846",
abstract = "Host-managed shingled magnetic recording drives
(HM-SMR) are advantageous in capacity to harness the
explosive growth of data. For key-value (KV) stores
based on log-structured merge trees (LSM-trees), the
HM-SMR drive is an ideal solution owning to its
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "23",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zheng:2022:WBD,
author = "Jianwei Zheng and Zhenhua Li and Yuanhui Qiu and Hao
Lin and He Xiao and Yang Li and Yunhao Liu",
title = "{WebAssembly}-based Delta Sync for Cloud Storage
Services",
journal = j-TOS,
volume = "18",
number = "3",
pages = "24:1--24:31",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3502847",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3502847",
abstract = "Delta synchronization (sync) is crucial to the
network-level efficiency of cloud storage services,
especially when handling large files with small
increments. Practical delta sync techniques are,
however, only available for PC clients and mobile apps,
but \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "24",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zou:2022:HDS,
author = "Xiangyu Zou and Jingsong Yuan and Philip Shilane and
Wen Xia and Haijun Zhang and Xuan Wang",
title = "From Hyper-dimensional Structures to Linear
Structures: Maintaining Deduplicated Data's Locality",
journal = j-TOS,
volume = "18",
number = "3",
pages = "25:1--25:28",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3507921",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3507921",
abstract = "Data deduplication is widely used to reduce the size
of backup workloads, but it has the known disadvantage
of causing poor data locality, also referred to as the
fragmentation problem. This results from the gap
between the hyper-dimensional structure of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "25",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{May:2022:DGE,
author = "Michael J. May",
title = "{Donag}: Generating Efficient Patches and Diffs for
Compressed Archives",
journal = j-TOS,
volume = "18",
number = "3",
pages = "26:1--26:41",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3507919",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3507919",
abstract = "Differencing between compressed archives is a common
task in file management and synchronization.
Applications include source code distribution,
application updates, and document synchronization.
General purpose binary differencing tools can create
and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "26",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Einziger:2022:LRS,
author = "Gil Einziger and Ohad Eytan and Roy Friedman and
Benjamin Manes",
title = "Lightweight Robust Size Aware Cache Management",
journal = j-TOS,
volume = "18",
number = "3",
pages = "27:1--27:23",
month = aug,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3507920",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Wed Sep 28 10:41:23 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3507920",
abstract = "Modern key-value stores, object stores, Internet proxy
caches, and Content Delivery Networks (CDN) often
manage objects of diverse sizes, e.g., blobs, video
files of different lengths, images with varying
resolutions, and small documents. In such \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "27",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Dean:2022:ISS,
author = "Hildebrand Dean and Donald Porter",
title = "Introduction to the Special Section on {USENIX FAST
2022}",
journal = j-TOS,
volume = "18",
number = "4",
pages = "28:1--28:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3564770",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3564770",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "28",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Jaffer:2022:IEN,
author = "Shehbaz Jaffer and Kaveh Mahdaviani and Bianca
Schroeder",
title = "Improving the Endurance of Next Generation {SSD}'s
using {WOM-v} Codes",
journal = j-TOS,
volume = "18",
number = "4",
pages = "29:1--29:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565027",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3565027",
abstract = "High density Solid State Drives, such as QLC drives,
offer increased storage capacity, but a magnitude lower
Program and Erase (P/E) cycles, limiting their
endurance and hence usability. We present the design
and implementation of non-binary, Voltage-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "29",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Li:2022:CRF,
author = "Ruibin Li and Xiang Ren and Xu Zhao and Siwei He and
Michael Stumm and Ding Yuan",
title = "{ctFS}: Replacing File Indexing with Hardware Memory
Translation through Contiguous File Allocation for
Persistent Memory",
journal = j-TOS,
volume = "18",
number = "4",
pages = "30:1--30:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565026",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3565026",
abstract = "Persistent byte-addressable memory (PM) is poised to
become prevalent in future computer systems. PMs are
significantly faster than disk storage, and accesses to
PMs are governed by the Memory Management Unit (MMU)
just as accesses with volatile RAM. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "30",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Kisous:2022:WMG,
author = "Roei Kisous and Ariel Kolikant and Abhinav Duggal and
Sarai Sheinvald and Gala Yadgar",
title = "The what, The from, and The to: The Migration Games in
Deduplicated Systems",
journal = j-TOS,
volume = "18",
number = "4",
pages = "31:1--31:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3565025",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3565025",
abstract = "Deduplication reduces the size of the data stored in
large-scale storage systems by replacing duplicate data
blocks with references to their unique copies. This
creates dependencies between files that contain similar
content and complicates the management \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "31",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Yang:2022:TED,
author = "Zuoru Yang and Jingwei Li and Yanjing Ren and Patrick
P. C. Lee",
title = "Tunable Encrypted Deduplication with Attack-resilient
Key Management",
journal = j-TOS,
volume = "18",
number = "4",
pages = "32:1--32:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510614",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3510614",
abstract = "Conventional encrypted deduplication approaches retain
the deduplication capability on duplicate chunks after
encryption by always deriving the key for
encryption/decryption from the chunk content, but such
a deterministic nature causes information \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "32",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Nicolaou:2022:AAR,
author = "Nicolas Nicolaou and Viveck Cadambe and N. Prakash and
Andria Trigeorgi and Kishori Konwar and Muriel Medard
and Nancy Lynch",
title = "Ares: Adaptive, Reconfigurable, Erasure coded, Atomic
Storage",
journal = j-TOS,
volume = "18",
number = "4",
pages = "33:1--33:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3510613",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3510613",
abstract = "Emulating a shared atomic, read/write storage system
is a fundamental problem in distributed computing.
Replicating atomic objects among a set of data hosts
was the norm for traditional implementations (e.g., [
11 ]) in order to guarantee the availability \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "33",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Lawson:2022:EAS,
author = "Margaret Lawson and William Gropp and Jay Lofstead",
title = "{EMPRESS}: Accelerating Scientific Discovery through
Descriptive Metadata Management",
journal = j-TOS,
volume = "18",
number = "4",
pages = "34:1--34:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3523698",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3523698",
abstract = "High-performance computing scientists are producing
unprecedented volumes of data that take a long time to
load for analysis. However, many analyses only require
loading in the data containing particular features of
interest and scientists have many \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "34",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Zhou:2022:DFP,
author = "Yang Zhou and Fang Wang and Dan Feng",
title = "A Disk Failure Prediction Method Based on Active
Semi-supervised Learning",
journal = j-TOS,
volume = "18",
number = "4",
pages = "35:1--35:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3523699",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3523699",
abstract = "Disk failure has always been a major problem for data
centers, leading to data loss. Current disk failure
prediction approaches are mostly offline and assume
that the disk labels required for training learning
models are available and accurate. However, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "35",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Wang:2022:TFS,
author = "Rui Wang and Yongkun Li and Yinlong Xu and Hong Xie
and John C. S. Lui and Shuibing He",
title = "Toward Fast and Scalable Random Walks over
Disk-Resident Graphs via Efficient {I/O} Management",
journal = j-TOS,
volume = "18",
number = "4",
pages = "36:1--36:??",
month = nov,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533579",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Tue Dec 20 07:58:36 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3533579",
abstract = "Traditional graph systems mainly use the
iteration-based model, which iteratively loads graph
blocks into memory for analysis so as to reduce random
I/Os. However, this iteration-based model limits the
efficiency and scalability of running random walk,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Storage",
articleno = "36",
fjournal = "ACM Transactions on Storage",
journal-URL = "https://dl.acm.org/loi/tos",
}
@Article{Anonymous:2023:ECM,
author = "Anonymous",
title = "{Editor-in-Chief} Message",
journal = j-TOS,
volume = "19",
number = "1",
pages = "1:1--1:1",
month = feb,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3574325",
ISSN = "1553-3077 (print), 1553-3093 (electronic)",
ISSN-L = "1553-3077",
bibdate = "Mon Apr 17 12:00:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tos.bib",
URL = "https://dl.acm.org/doi/10.1145/3574325",
acknowledgement =